diff --git a/.asf.yaml b/.asf.yaml index 40b961dc6e885..a1c6434587703 100644 --- a/.asf.yaml +++ b/.asf.yaml @@ -21,10 +21,12 @@ github: collaborators: - anjakefala - benibus - - danepitkin - davisusanibar + - jbonofre - js8544 - vibhatha + - zanmato1984 + - ZhangHuiGui notifications: commits: commits@arrow.apache.org diff --git a/.env b/.env index f379ca14cd205..27474b2c73199 100644 --- a/.env +++ b/.env @@ -56,7 +56,7 @@ UBUNTU=20.04 CLANG_TOOLS=14 CUDA=11.2.2 DASK=latest -DOTNET=7.0 +DOTNET=8.0 GCC_VERSION="" GO=1.21.8 STATICCHECK=v0.4.7 @@ -71,12 +71,12 @@ NUMBA=latest NUMPY=latest PANDAS=latest PYTHON=3.8 -R=4.2 +R=4.4 SPARK=master TURBODBC=latest -# These correspond to images on Docker Hub that contain R, e.g. rhub/ubuntu-gcc-release:latest -R_IMAGE=ubuntu-gcc-release +# These correspond to images on Docker Hub that contain R, e.g. rhub/ubuntu-release:latest +R_IMAGE=ubuntu-release R_ORG=rhub R_TAG=latest @@ -86,9 +86,6 @@ ARROW_R_DEV=TRUE R_PRUNE_DEPS=FALSE TZ=UTC -# Any non-empty string will install devtoolset-${DEVTOOLSET_VERSION} -DEVTOOLSET_VERSION= - # Used through docker-compose.yml and serves as the default version for the # ci/scripts/install_vcpkg.sh script. Prefer to use short SHAs to keep the # docker tags more readable. diff --git a/.github/dependabot.yml b/.github/dependabot.yml index e96cb8d2eb1e3..7d9ff2f42e887 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -23,30 +23,35 @@ updates: interval: "weekly" commit-message: prefix: "MINOR: [CI] " + open-pull-requests-limit: 10 - package-ecosystem: "gomod" directory: "/go/" schedule: interval: "weekly" commit-message: prefix: "MINOR: [Go] " + open-pull-requests-limit: 10 - package-ecosystem: "maven" directory: "/java/" schedule: interval: "weekly" commit-message: prefix: "MINOR: [Java] " + open-pull-requests-limit: 10 - package-ecosystem: "npm" directory: "/js/" schedule: interval: "monthly" commit-message: prefix: "MINOR: [JS] " + open-pull-requests-limit: 10 - package-ecosystem: "nuget" directory: "/csharp/" schedule: interval: "weekly" commit-message: prefix: "MINOR: [C#] " + open-pull-requests-limit: 10 ignore: - dependency-name: "Microsoft.Extensions.*" update-types: diff --git a/.github/workflows/cpp.yml b/.github/workflows/cpp.yml index dd5abbe1b4b1b..e539fadb859fe 100644 --- a/.github/workflows/cpp.yml +++ b/.github/workflows/cpp.yml @@ -21,6 +21,7 @@ on: push: paths: - '.github/workflows/cpp.yml' + - 'ci/conda_env_*' - 'ci/docker/**' - 'ci/scripts/cpp_*' - 'ci/scripts/install_azurite.sh' @@ -35,6 +36,7 @@ on: pull_request: paths: - '.github/workflows/cpp.yml' + - 'ci/conda_env_*' - 'ci/docker/**' - 'ci/scripts/cpp_*' - 'ci/scripts/install_azurite.sh' diff --git a/.github/workflows/csharp.yml b/.github/workflows/csharp.yml index 17ef2de81088f..e4db9f482e206 100644 --- a/.github/workflows/csharp.yml +++ b/.github/workflows/csharp.yml @@ -46,7 +46,7 @@ jobs: strategy: fail-fast: false matrix: - dotnet: ['7.0.x'] + dotnet: ['8.0.x'] steps: - name: Install C# uses: actions/setup-dotnet@v4 @@ -74,7 +74,7 @@ jobs: strategy: fail-fast: false matrix: - dotnet: ['7.0.x'] + dotnet: ['8.0.x'] steps: - name: Install C# uses: actions/setup-dotnet@v4 @@ -94,19 +94,23 @@ jobs: run: ci/scripts/csharp_test.sh $(pwd) macos: - name: AMD64 macOS 11 C# ${{ matrix.dotnet }} - runs-on: macos-latest + name: AMD64 macOS 13 C# ${{ matrix.dotnet }} + runs-on: macos-13 # Pending https://github.com/pythonnet/pythonnet/issues/2396 if: ${{ !contains(github.event.pull_request.title, 'WIP') }} timeout-minutes: 15 strategy: fail-fast: false matrix: - dotnet: ['7.0.x'] + dotnet: ['8.0.x'] steps: - name: Install C# uses: actions/setup-dotnet@v4 with: dotnet-version: ${{ matrix.dotnet }} + - name: Setup Python + uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5.1.0 + with: + python-version: 3.12 - name: Checkout Arrow uses: actions/checkout@v4 with: diff --git a/.github/workflows/dev.yml b/.github/workflows/dev.yml index 7c438be6024d6..1ea12b0a4d23d 100644 --- a/.github/workflows/dev.yml +++ b/.github/workflows/dev.yml @@ -115,12 +115,12 @@ jobs: - name: Install .NET uses: actions/setup-dotnet@4d6c8fcf3c8f7a60068d26b594648e99df24cee3 # v4.0.0 with: - dotnet-version: '7.0.x' + dotnet-version: '8.0.x' - name: Install Dependencies shell: bash run: | gem install test-unit - pip install "cython>=0.29.31" setuptools six pytest jira + pip install "cython>=0.29.31" setuptools six pytest jira setuptools-scm - name: Run Release Test env: ARROW_GITHUB_API_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index fe49e275d908d..36a0dc014db8d 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -32,12 +32,12 @@ env: jobs: complete: - name: AMD64 Ubuntu 22.04 Complete Documentation + name: AMD64 Debian 12 Complete Documentation runs-on: ubuntu-latest if: ${{ !contains(github.event.pull_request.title, 'WIP') }} timeout-minutes: 150 env: - UBUNTU: "22.04" + JDK: 17 steps: - name: Checkout Arrow uses: actions/checkout@3df4ab11eba7bda6032a0b82a6bb43b11571feac # v4.0.0 @@ -50,8 +50,8 @@ jobs: uses: actions/cache@13aacd865c20de90d75de3b17ebe84f7a17d57d2 # v4.0.0 with: path: .docker - key: ubuntu-docs-${{ hashFiles('cpp/**') }} - restore-keys: ubuntu-docs- + key: debian-docs-${{ hashFiles('cpp/**') }} + restore-keys: debian-docs- - name: Setup Python uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5.1.0 with: @@ -62,7 +62,8 @@ jobs: env: ARCHERY_DOCKER_USER: ${{ secrets.DOCKERHUB_USER }} ARCHERY_DOCKER_PASSWORD: ${{ secrets.DOCKERHUB_TOKEN }} - run: archery docker run ubuntu-docs + JDK: 17 + run: archery docker run debian-docs - name: Docker Push if: >- success() && @@ -73,4 +74,4 @@ jobs: ARCHERY_DOCKER_USER: ${{ secrets.DOCKERHUB_USER }} ARCHERY_DOCKER_PASSWORD: ${{ secrets.DOCKERHUB_TOKEN }} continue-on-error: true - run: archery docker push ubuntu-docs + run: archery docker push debian-docs diff --git a/.github/workflows/docs_light.yml b/.github/workflows/docs_light.yml index 376c87651d2d0..947e2ac21b83c 100644 --- a/.github/workflows/docs_light.yml +++ b/.github/workflows/docs_light.yml @@ -31,7 +31,7 @@ on: permissions: contents: read - + env: ARCHERY_DEBUG: 1 ARCHERY_USE_DOCKER_CLI: 1 diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml index 6e09ad61480a6..f53f4aeb505d2 100644 --- a/.github/workflows/integration.yml +++ b/.github/workflows/integration.yml @@ -75,6 +75,11 @@ jobs: with: repository: apache/arrow-rs path: rust + - name: Checkout Arrow nanoarrow + uses: actions/checkout@3df4ab11eba7bda6032a0b82a6bb43b11571feac # v4.0.0 + with: + repository: apache/arrow-nanoarrow + path: nanoarrow - name: Free up disk space run: | ci/scripts/util_free_space.sh @@ -97,6 +102,7 @@ jobs: run: > archery docker run \ -e ARCHERY_DEFAULT_BRANCH=${{ github.event.repository.default_branch }} \ + -e ARCHERY_INTEGRATION_WITH_NANOARROW=1 \ -e ARCHERY_INTEGRATION_WITH_RUST=1 \ conda-integration - name: Docker Push diff --git a/.github/workflows/issue_bot.yml b/.github/workflows/issue_bot.yml index ec614ca1e7c56..2725825b56952 100644 --- a/.github/workflows/issue_bot.yml +++ b/.github/workflows/issue_bot.yml @@ -21,7 +21,6 @@ on: issues: types: - opened - - edited permissions: contents: read diff --git a/.github/workflows/java.yml b/.github/workflows/java.yml index e92d3f4fc5877..e31f7a4fc4d27 100644 --- a/.github/workflows/java.yml +++ b/.github/workflows/java.yml @@ -86,11 +86,11 @@ jobs: env: ARCHERY_DOCKER_USER: ${{ secrets.DOCKERHUB_USER }} ARCHERY_DOCKER_PASSWORD: ${{ secrets.DOCKERHUB_TOKEN }} - GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + DEVELOCITY_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} run: | archery docker run \ -e CI=true \ - -e "GRADLE_ENTERPRISE_ACCESS_KEY=$GRADLE_ENTERPRISE_ACCESS_KEY" \ + -e "DEVELOCITY_ACCESS_KEY=$DEVELOCITY_ACCESS_KEY" \ ${{ matrix.image }} - name: Docker Push if: >- @@ -127,12 +127,12 @@ jobs: - name: Build shell: bash env: - GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + DEVELOCITY_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} run: ci/scripts/java_build.sh $(pwd) $(pwd)/build - name: Test shell: bash env: - GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + DEVELOCITY_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} run: ci/scripts/java_test.sh $(pwd) $(pwd)/build windows: @@ -158,10 +158,10 @@ jobs: - name: Build shell: bash env: - GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + DEVELOCITY_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} run: ci/scripts/java_build.sh $(pwd) $(pwd)/build - name: Test shell: bash env: - GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + DEVELOCITY_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} run: ci/scripts/java_test.sh $(pwd) $(pwd)/build diff --git a/.github/workflows/java_jni.yml b/.github/workflows/java_jni.yml index 958216ac7669d..059a7430a38ce 100644 --- a/.github/workflows/java_jni.yml +++ b/.github/workflows/java_jni.yml @@ -120,11 +120,11 @@ jobs: env: ARCHERY_DOCKER_USER: ${{ secrets.DOCKERHUB_USER }} ARCHERY_DOCKER_PASSWORD: ${{ secrets.DOCKERHUB_TOKEN }} - GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + DEVELOCITY_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} run: | archery docker run \ -e CI=true \ - -e "GRADLE_ENTERPRISE_ACCESS_KEY=$GRADLE_ENTERPRISE_ACCESS_KEY" \ + -e "DEVELOCITY_ACCESS_KEY=$DEVELOCITY_ACCESS_KEY" \ conda-python-java-integration - name: Docker Push if: >- diff --git a/.github/workflows/js.yml b/.github/workflows/js.yml index c9b7d7b742d88..e03d0c2dadce0 100644 --- a/.github/workflows/js.yml +++ b/.github/workflows/js.yml @@ -106,10 +106,10 @@ jobs: node-version: ${{ matrix.node }} - name: Build shell: bash - run: ci/scripts/js_build.sh $(pwd) + run: ci/scripts/js_build.sh $(pwd) build - name: Test shell: bash - run: ci/scripts/js_test.sh $(pwd) + run: ci/scripts/js_test.sh $(pwd) build windows: name: AMD64 Windows NodeJS ${{ matrix.node }} @@ -136,7 +136,7 @@ jobs: node-version: ${{ matrix.node }} - name: Build shell: bash - run: ci/scripts/js_build.sh $(pwd) + run: ci/scripts/js_build.sh $(pwd) build - name: Test shell: bash - run: ci/scripts/js_test.sh $(pwd) + run: ci/scripts/js_test.sh $(pwd) build diff --git a/.github/workflows/matlab.yml b/.github/workflows/matlab.yml index aa3692e587961..ca8280927f4a5 100644 --- a/.github/workflows/matlab.yml +++ b/.github/workflows/matlab.yml @@ -70,7 +70,7 @@ jobs: - name: Install MATLAB uses: matlab-actions/setup-matlab@v2 with: - release: R2023a + release: R2024a - name: Install ccache run: sudo apt-get install ccache - name: Setup ccache @@ -96,10 +96,18 @@ jobs: uses: matlab-actions/run-tests@v2 with: select-by-folder: matlab/test + strict: true macos: - name: AMD64 macOS 12 MATLAB - runs-on: macos-12 + name: ${{ matrix.architecture }} macOS ${{ matrix.macos-version }} MATLAB + runs-on: macos-${{ matrix.macos-version }} if: ${{ !contains(github.event.pull_request.title, 'WIP') }} + strategy: + matrix: + include: + - architecture: AMD64 + macos-version: "12" + - architecture: ARM64 + macos-version: "14" steps: - name: Check out repository uses: actions/checkout@v4 @@ -110,7 +118,7 @@ jobs: - name: Install MATLAB uses: matlab-actions/setup-matlab@v2 with: - release: R2023a + release: R2024a - name: Install ccache run: brew install ccache - name: Setup ccache @@ -135,7 +143,8 @@ jobs: MATLABPATH: matlab/install/arrow_matlab uses: matlab-actions/run-tests@v2 with: - select-by-folder: matlab/test + select-by-folder: matlab/test + strict: true windows: name: AMD64 Windows 2022 MATLAB runs-on: windows-2022 @@ -148,7 +157,7 @@ jobs: - name: Install MATLAB uses: matlab-actions/setup-matlab@v2 with: - release: R2023a + release: R2024a - name: Download Timezone Database shell: bash run: ci/scripts/download_tz_database.sh @@ -181,4 +190,5 @@ jobs: MATLABPATH: matlab/install/arrow_matlab uses: matlab-actions/run-tests@v2 with: - select-by-folder: matlab/test + select-by-folder: matlab/test + strict: true diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml index 15056961f8cf4..a568f8346e7fc 100644 --- a/.github/workflows/python.yml +++ b/.github/workflows/python.yml @@ -182,6 +182,19 @@ jobs: python -m pip install \ -r python/requirements-build.txt \ -r python/requirements-test.txt + - name: Setup ccache + shell: bash + run: ci/scripts/ccache_setup.sh + - name: ccache info + id: ccache-info + shell: bash + run: echo "cache-dir=$(ccache --get-config cache_dir)" >> $GITHUB_OUTPUT + - name: Cache ccache + uses: actions/cache@v4 + with: + path: ${{ steps.ccache-info.outputs.cache-dir }} + key: python-ccache-macos-${{ matrix.macos-version }}-${{ hashFiles('cpp/**', 'python/**') }} + restore-keys: python-ccache-macos-${{ matrix.macos-version }}- - name: Build shell: bash run: | diff --git a/.github/workflows/r.yml b/.github/workflows/r.yml index 05c85fa6dc2c2..6bd940f806775 100644 --- a/.github/workflows/r.yml +++ b/.github/workflows/r.yml @@ -121,7 +121,7 @@ jobs: strategy: fail-fast: false matrix: - r: ["4.3"] + r: ["4.4"] ubuntu: [20.04] force-tests: ["true"] env: @@ -192,12 +192,11 @@ jobs: fail-fast: false matrix: config: - - { org: "rhub", image: "debian-gcc-devel", tag: "latest", devtoolset: "" } + - { org: "rhub", image: "ubuntu-gcc12", tag: "latest" } env: R_ORG: ${{ matrix.config.org }} R_IMAGE: ${{ matrix.config.image }} R_TAG: ${{ matrix.config.tag }} - DEVTOOLSET_VERSION: ${{ matrix.config.devtoolset }} steps: - name: Checkout Arrow uses: actions/checkout@3df4ab11eba7bda6032a0b82a6bb43b11571feac # v4.0.0 @@ -371,11 +370,12 @@ jobs: MAKEFLAGS = paste0("-j", parallel::detectCores()), ARROW_R_DEV = TRUE, "_R_CHECK_FORCE_SUGGESTS_" = FALSE, - "_R_CHECK_STOP_ON_INVALID_NUMERIC_VERSION_INPUTS_" = TRUE + "_R_CHECK_STOP_ON_INVALID_NUMERIC_VERSION_INPUTS_" = TRUE, + "_R_CHECK_DONTTEST_EXAMPLES_" = TRUE ) rcmdcheck::rcmdcheck(".", build_args = '--no-build-vignettes', - args = c('--no-manual', '--as-cran', '--ignore-vignettes', '--run-donttest'), + args = c('--no-manual', '--as-cran', '--ignore-vignettes'), error_on = 'warning', check_dir = 'check', timeout = 3600 diff --git a/.github/workflows/ruby.yml b/.github/workflows/ruby.yml index 11e3c93ed0806..eb00bc5f92a8d 100644 --- a/.github/workflows/ruby.yml +++ b/.github/workflows/ruby.yml @@ -186,7 +186,7 @@ jobs: shell: bash run: ci/scripts/ruby_test.sh $(pwd) $(pwd)/build - windows: + windows-mingw: name: AMD64 Windows MinGW ${{ matrix.mingw-n-bits }} GLib & Ruby runs-on: windows-2019 if: ${{ !contains(github.event.pull_request.title, 'WIP') }} @@ -197,9 +197,7 @@ jobs: mingw-n-bits: - 64 ruby-version: - # TODO: Use the latest Ruby again when we fix GH-39130. - # - ruby - - "3.1" + - ruby env: ARROW_BUILD_STATIC: OFF ARROW_BUILD_TESTS: OFF @@ -267,7 +265,6 @@ jobs: ridk exec bash ci\scripts\cpp_build.sh "${source_dir}" "${build_dir}" - name: Build GLib run: | - $Env:CMAKE_BUILD_PARALLEL_LEVEL = $Env:NUMBER_OF_PROCESSORS $source_dir = "$(ridk exec cygpath --unix "$(Get-Location)")" $build_dir = "$(ridk exec cygpath --unix "$(Get-Location)\build")" $ErrorActionPreference = "Continue" @@ -305,3 +302,117 @@ jobs: $Env:MAKE = "ridk exec make" $ErrorActionPreference = "Continue" rake -f ruby\Rakefile + + windows-msvc: + name: AMD64 Windows MSVC GLib + runs-on: windows-2019 + if: ${{ !contains(github.event.pull_request.title, 'WIP') }} + timeout-minutes: 90 + strategy: + fail-fast: false + env: + ARROW_ACERO: ON + ARROW_BOOST_USE_SHARED: OFF + ARROW_BUILD_BENCHMARKS: OFF + ARROW_BUILD_SHARED: ON + ARROW_BUILD_STATIC: OFF + ARROW_BUILD_TESTS: OFF + ARROW_DATASET: ON + ARROW_DEPENDENCY_SOURCE: VCPKG + ARROW_DEPENDENCY_USE_SHARED: OFF + ARROW_FLIGHT: ON + ARROW_FLIGHT_SQL: ON + ARROW_GANDIVA: OFF + ARROW_HDFS: OFF + ARROW_HOME: "${{ github.workspace }}/dist" + ARROW_JEMALLOC: OFF + ARROW_MIMALLOC: ON + ARROW_ORC: OFF + ARROW_PARQUET: ON + ARROW_SUBSTRAIT: OFF + ARROW_USE_GLOG: OFF + ARROW_VERBOSE_THIRDPARTY_BUILD: OFF + ARROW_WITH_BROTLI: OFF + ARROW_WITH_BZ2: OFF + ARROW_WITH_LZ4: OFF + ARROW_WITH_OPENTELEMETRY: OFF + ARROW_WITH_SNAPPY: ON + ARROW_WITH_ZLIB: ON + ARROW_WITH_ZSTD: ON + CMAKE_CXX_STANDARD: "17" + CMAKE_GENERATOR: Ninja + CMAKE_INSTALL_PREFIX: "${{ github.workspace }}/dist" + CMAKE_UNITY_BUILD: ON + VCPKG_BINARY_SOURCES: 'clear;nuget,GitHub,readwrite' + VCPKG_ROOT: "${{ github.workspace }}/vcpkg" + permissions: + packages: write + steps: + - name: Disable Crash Dialogs + run: | + reg add ` + "HKCU\SOFTWARE\Microsoft\Windows\Windows Error Reporting" ` + /v DontShowUI ` + /t REG_DWORD ` + /d 1 ` + /f + - name: Checkout Arrow + uses: actions/checkout@v4 + with: + fetch-depth: 0 + submodules: recursive + - name: Install vcpkg + shell: bash + run: | + ci/scripts/install_vcpkg.sh "${VCPKG_ROOT}" + - name: Install meson + run: | + python -m pip install meson + - name: Install ccache + shell: bash + run: | + ci/scripts/install_ccache.sh 4.6.3 /usr + - name: Setup ccache + shell: bash + run: | + ci/scripts/ccache_setup.sh + - name: ccache info + id: ccache-info + shell: bash + run: | + echo "cache-dir=$(ccache --get-config cache_dir)" >> $GITHUB_OUTPUT + - name: Cache ccache + uses: actions/cache@v4 + with: + path: ${{ steps.ccache-info.outputs.cache-dir }} + key: glib-ccache-msvc-${{ env.CACHE_VERSION }}-${{ hashFiles('cpp/**') }} + restore-keys: glib-ccache-msvc-${{ env.CACHE_VERSION }}- + env: + # We can invalidate the current cache by updating this. + CACHE_VERSION: "2024-05-09" + - name: Setup NuGet credentials for vcpkg caching + shell: bash + run: | + $(vcpkg/vcpkg.exe fetch nuget | tail -n 1) \ + sources add \ + -source "https://nuget.pkg.github.com/$GITHUB_REPOSITORY_OWNER/index.json" \ + -storepasswordincleartext \ + -name "GitHub" \ + -username "$GITHUB_REPOSITORY_OWNER" \ + -password "${{ secrets.GITHUB_TOKEN }}" + $(vcpkg/vcpkg.exe fetch nuget | tail -n 1) \ + setapikey "${{ secrets.GITHUB_TOKEN }}" \ + -source "https://nuget.pkg.github.com/$GITHUB_REPOSITORY_OWNER/index.json" + - name: Build C++ vcpkg dependencies + run: | + vcpkg\vcpkg.exe install --triplet x64-windows --x-manifest-root cpp --x-install-root build\cpp\vcpkg_installed + - name: Build C++ + shell: cmd + run: | + call "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\VC\Auxiliary\Build\vcvarsall.bat" x64 + bash -c "ci/scripts/cpp_build.sh $(pwd) $(pwd)/build" + - name: Build GLib + shell: cmd + run: | + call "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\VC\Auxiliary\Build\vcvarsall.bat" x64 + bash -c "ci/scripts/c_glib_build.sh $(pwd) $(pwd)/build" diff --git a/.gitignore b/.gitignore index c7f5aa90e18e6..52ffa6c6124c2 100644 --- a/.gitignore +++ b/.gitignore @@ -102,4 +102,9 @@ __debug_bin .envrc # Develocity -.mvn/.gradle-enterprise/ +java/.mvn/.gradle-enterprise/ +java/.mvn/.develocity/ + +# rat +filtered_rat.txt +rat.txt diff --git a/dev/tasks/linux-packages/apache-arrow/apt/debian-bullseye-arm64/from b/.golangci.yaml similarity index 77% rename from dev/tasks/linux-packages/apache-arrow/apt/debian-bullseye-arm64/from rename to .golangci.yaml index 34187b2af5a74..7d486a9e85a0a 100644 --- a/dev/tasks/linux-packages/apache-arrow/apt/debian-bullseye-arm64/from +++ b/.golangci.yaml @@ -15,4 +15,15 @@ # specific language governing permissions and limitations # under the License. -arm64v8/debian:bullseye +linters: + # Disable all linters. + # Default: false + disable-all: true + # Enable specific linter + # https://golangci-lint.run/usage/linters/#enabled-by-default + enable: + - gofmt + - goimports + +issues: + fix: true \ No newline at end of file diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 2e598e0a95064..05bf8e54f9cdb 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -40,7 +40,7 @@ repos: hooks: - id: flake8 name: Python Format - files: ^(python|dev|integration)/ + files: ^(python|dev|c_glib|integration)/ types: - file - python @@ -116,19 +116,42 @@ repos: name: CMake Format files: >- ( + ?.*CMakeLists\.txt$| ?^ci/.*/.*\.cmake$| ?^cpp/.*/.*\.cmake\.in$| ?^cpp/.*/.*\.cmake$| - ?^cpp/.*/CMakeLists\.txt$| - ?^go/.*/CMakeLists\.txt$| - ?^java/.*/CMakeLists\.txt$| - ?^matlab/.*/CMakeLists\.txt$| - ?^python/.*/CMakeLists\.txt$| ) exclude: >- ( + ?^ci/conan/all/.*CMakeLists\.txt$| ?^cpp/cmake_modules/FindNumPy\.cmake$| ?^cpp/cmake_modules/FindPythonLibsNew\.cmake$| ?^cpp/cmake_modules/UseCython\.cmake$| - ?^cpp/src/arrow/util/config\.h\.cmake$| + ?^cpp/src/arrow/util/.*\.h\.cmake$| ) + - repo: https://github.com/sphinx-contrib/sphinx-lint + rev: v0.9.1 + hooks: + - id: sphinx-lint + files: ^docs/source + exclude: ^docs/source/python/generated + args: [ + '--enable', + 'all', + '--disable', + 'dangling-hyphen,line-too-long', + ] + - repo: https://github.com/golangci/golangci-lint + rev: v1.59.0 + hooks: + # no built-in support for multiple go.mod + # https://github.com/golangci/golangci-lint/issues/828 + - id: golangci-lint-full + name: golangci-lint-full-arrow + entry: bash -c 'cd go/arrow && golangci-lint run' + - id: golangci-lint-full + name: golangci-lint-full-parquet + entry: bash -c 'cd go/parquet && golangci-lint run' + - id: golangci-lint-full + name: golangci-lint-full-internal + entry: bash -c 'cd go/internal && golangci-lint run' diff --git a/c_glib/arrow-cuda-glib/arrow-cuda-glib.h b/c_glib/arrow-cuda-glib/arrow-cuda-glib.h index b3c7f21087669..572ff92ed9b43 100644 --- a/c_glib/arrow-cuda-glib/arrow-cuda-glib.h +++ b/c_glib/arrow-cuda-glib/arrow-cuda-glib.h @@ -21,4 +21,6 @@ #include +#include + #include diff --git a/c_glib/arrow-cuda-glib/cuda.h b/c_glib/arrow-cuda-glib/cuda.h index 863743a620bf8..f04a3381259bb 100644 --- a/c_glib/arrow-cuda-glib/cuda.h +++ b/c_glib/arrow-cuda-glib/cuda.h @@ -21,9 +21,12 @@ #include +#include + G_BEGIN_DECLS #define GARROW_CUDA_TYPE_DEVICE_MANAGER (garrow_cuda_device_manager_get_type()) +GARROW_CUDA_AVAILABLE_IN_0_12 G_DECLARE_DERIVABLE_TYPE(GArrowCUDADeviceManager, garrow_cuda_device_manager, GARROW_CUDA, @@ -35,6 +38,7 @@ struct _GArrowCUDADeviceManagerClass }; #define GARROW_CUDA_TYPE_CONTEXT (garrow_cuda_context_get_type()) +GARROW_CUDA_AVAILABLE_IN_0_12 G_DECLARE_DERIVABLE_TYPE( GArrowCUDAContext, garrow_cuda_context, GARROW_CUDA, CONTEXT, GObject) struct _GArrowCUDAContextClass @@ -43,6 +47,7 @@ struct _GArrowCUDAContextClass }; #define GARROW_CUDA_TYPE_BUFFER (garrow_cuda_buffer_get_type()) +GARROW_CUDA_AVAILABLE_IN_0_12 G_DECLARE_DERIVABLE_TYPE( GArrowCUDABuffer, garrow_cuda_buffer, GARROW_CUDA, BUFFER, GArrowBuffer) struct _GArrowCUDABufferClass @@ -51,6 +56,7 @@ struct _GArrowCUDABufferClass }; #define GARROW_CUDA_TYPE_HOST_BUFFER (garrow_cuda_host_buffer_get_type()) +GARROW_CUDA_AVAILABLE_IN_0_12 G_DECLARE_DERIVABLE_TYPE(GArrowCUDAHostBuffer, garrow_cuda_host_buffer, GARROW_CUDA, @@ -62,6 +68,7 @@ struct _GArrowCUDAHostBufferClass }; #define GARROW_CUDA_TYPE_IPC_MEMORY_HANDLE (garrow_cuda_ipc_memory_handle_get_type()) +GARROW_CUDA_AVAILABLE_IN_0_12 G_DECLARE_DERIVABLE_TYPE(GArrowCUDAIPCMemoryHandle, garrow_cuda_ipc_memory_handle, GARROW_CUDA, @@ -73,6 +80,7 @@ struct _GArrowCUDAIPCMemoryHandleClass }; #define GARROW_CUDA_TYPE_BUFFER_INPUT_STREAM (garrow_cuda_buffer_input_stream_get_type()) +GARROW_CUDA_AVAILABLE_IN_0_12 G_DECLARE_DERIVABLE_TYPE(GArrowCUDABufferInputStream, garrow_cuda_buffer_input_stream, GARROW_CUDA, @@ -85,6 +93,7 @@ struct _GArrowCUDABufferInputStreamClass #define GARROW_CUDA_TYPE_BUFFER_OUTPUT_STREAM \ (garrow_cuda_buffer_output_stream_get_type()) +GARROW_CUDA_AVAILABLE_IN_0_12 G_DECLARE_DERIVABLE_TYPE(GArrowCUDABufferOutputStream, garrow_cuda_buffer_output_stream, GARROW_CUDA, @@ -95,71 +104,100 @@ struct _GArrowCUDABufferOutputStreamClass GArrowOutputStreamClass parent_class; }; +GARROW_CUDA_AVAILABLE_IN_0_12 GArrowCUDADeviceManager * garrow_cuda_device_manager_new(GError **error); +GARROW_CUDA_AVAILABLE_IN_0_12 GArrowCUDAContext * garrow_cuda_device_manager_get_context(GArrowCUDADeviceManager *manager, gint gpu_number, GError **error); + +GARROW_CUDA_AVAILABLE_IN_0_12 gsize garrow_cuda_device_manager_get_n_devices(GArrowCUDADeviceManager *manager); +GARROW_CUDA_AVAILABLE_IN_0_12 gint64 garrow_cuda_context_get_allocated_size(GArrowCUDAContext *context); +GARROW_CUDA_AVAILABLE_IN_0_12 GArrowCUDABuffer * garrow_cuda_buffer_new(GArrowCUDAContext *context, gint64 size, GError **error); + +GARROW_CUDA_AVAILABLE_IN_0_12 GArrowCUDABuffer * garrow_cuda_buffer_new_ipc(GArrowCUDAContext *context, GArrowCUDAIPCMemoryHandle *handle, GError **error); + +GARROW_CUDA_AVAILABLE_IN_0_12 GArrowCUDABuffer * garrow_cuda_buffer_new_record_batch(GArrowCUDAContext *context, GArrowRecordBatch *record_batch, GError **error); + +GARROW_CUDA_AVAILABLE_IN_0_12 GBytes * garrow_cuda_buffer_copy_to_host(GArrowCUDABuffer *buffer, gint64 position, gint64 size, GError **error); + +GARROW_CUDA_AVAILABLE_IN_0_12 gboolean garrow_cuda_buffer_copy_from_host(GArrowCUDABuffer *buffer, const guint8 *data, gint64 size, GError **error); + +GARROW_CUDA_AVAILABLE_IN_0_12 GArrowCUDAIPCMemoryHandle * garrow_cuda_buffer_export(GArrowCUDABuffer *buffer, GError **error); + +GARROW_CUDA_AVAILABLE_IN_0_12 GArrowCUDAContext * garrow_cuda_buffer_get_context(GArrowCUDABuffer *buffer); + +GARROW_CUDA_AVAILABLE_IN_0_12 GArrowRecordBatch * garrow_cuda_buffer_read_record_batch(GArrowCUDABuffer *buffer, GArrowSchema *schema, GArrowReadOptions *options, GError **error); +GARROW_CUDA_AVAILABLE_IN_0_12 GArrowCUDAHostBuffer * garrow_cuda_host_buffer_new(gint gpu_number, gint64 size, GError **error); +GARROW_CUDA_AVAILABLE_IN_0_12 GArrowCUDAIPCMemoryHandle * garrow_cuda_ipc_memory_handle_new(const guint8 *data, gsize size, GError **error); +GARROW_CUDA_AVAILABLE_IN_0_12 GArrowBuffer * garrow_cuda_ipc_memory_handle_serialize(GArrowCUDAIPCMemoryHandle *handle, GError **error); +GARROW_CUDA_AVAILABLE_IN_0_12 GArrowCUDABufferInputStream * garrow_cuda_buffer_input_stream_new(GArrowCUDABuffer *buffer); +GARROW_CUDA_AVAILABLE_IN_0_12 GArrowCUDABufferOutputStream * garrow_cuda_buffer_output_stream_new(GArrowCUDABuffer *buffer); +GARROW_CUDA_AVAILABLE_IN_0_12 gboolean garrow_cuda_buffer_output_stream_set_buffer_size(GArrowCUDABufferOutputStream *stream, gint64 size, GError **error); +GARROW_CUDA_AVAILABLE_IN_0_12 gint64 garrow_cuda_buffer_output_stream_get_buffer_size(GArrowCUDABufferOutputStream *stream); + +GARROW_CUDA_AVAILABLE_IN_0_12 gint64 garrow_cuda_buffer_output_stream_get_buffered_size(GArrowCUDABufferOutputStream *stream); diff --git a/c_glib/arrow-cuda-glib/meson.build b/c_glib/arrow-cuda-glib/meson.build index 88029e6dc2073..47bed70f03b60 100644 --- a/c_glib/arrow-cuda-glib/meson.build +++ b/c_glib/arrow-cuda-glib/meson.build @@ -31,10 +31,17 @@ cpp_headers = files( 'cuda.hpp', ) +version_h = configure_file( + input: 'version.h.in', + output: 'version.h', + command: [python3, generate_version_header_py, '--library', 'GARROW_CUDA', '--version', version, '--input', '@INPUT@', '--output', '@OUTPUT@'], +) + +c_headers += version_h + headers = c_headers + cpp_headers install_headers(headers, subdir: 'arrow-cuda-glib') - dependencies = [ arrow_cuda, arrow_glib, @@ -45,6 +52,7 @@ libarrow_cuda_glib = library('arrow-cuda-glib', dependencies: dependencies, implicit_include_directories: false, include_directories: base_include_directories, + cpp_args: ['-DGARROW_CUDA_COMPILATION'], soversion: so_version, version: library_version) arrow_cuda_glib = declare_dependency(link_with: libarrow_cuda_glib, diff --git a/c_glib/arrow-cuda-glib/version.h.in b/c_glib/arrow-cuda-glib/version.h.in new file mode 100644 index 0000000000000..0ab5bfd562b41 --- /dev/null +++ b/c_glib/arrow-cuda-glib/version.h.in @@ -0,0 +1,159 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include + +/** + * SECTION: version + * @section_id: version-macros + * @title: Version related macros + * @include: arrow-cuda-glib/arrow-cuda-glib.h + * + * Apache Arrow CUDA GLib provides macros that can be used by C pre-processor. + * They are useful to check version related things at compile time. + */ + +/** + * GARROW_CUDA_VERSION_MAJOR: + * + * The major version. + * + * Since: 17.0.0 + */ +#define GARROW_CUDA_VERSION_MAJOR (@VERSION_MAJOR@) + +/** + * GARROW_CUDA_VERSION_MINOR: + * + * The minor version. + * + * Since: 17.0.0 + */ +#define GARROW_CUDA_VERSION_MINOR (@VERSION_MINOR@) + +/** + * GARROW_CUDA_VERSION_MICRO: + * + * The micro version. + * + * Since: 17.0.0 + */ +#define GARROW_CUDA_VERSION_MICRO (@VERSION_MICRO@) + +/** + * GARROW_CUDA_VERSION_TAG: + * + * The version tag. Normally, it's an empty string. It's "SNAPSHOT" + * for snapshot version. + * + * Since: 17.0.0 + */ +#define GARROW_CUDA_VERSION_TAG "@VERSION_TAG@" + +/** + * GARROW_CUDA_VERSION_CHECK: + * @major: A major version to check for. + * @minor: A minor version to check for. + * @micro: A micro version to check for. + * + * You can use this macro in C pre-processor. + * + * Returns: %TRUE if the compile time Apache Arrow GLib version is the + * same as or newer than the passed version, %FALSE otherwise. + * + * Since: 17.0.0 + */ +#define GARROW_CUDA_VERSION_CHECK(major, minor, micro) \ + (GARROW_CUDA_VERSION_MAJOR > (major) || \ + (GARROW_CUDA_VERSION_MAJOR == (major) && \ + GARROW_CUDA_VERSION_MINOR > (minor)) || \ + (GARROW_CUDA_VERSION_MAJOR == (major) && \ + GARROW_CUDA_VERSION_MINOR == (minor) && \ + GARROW_CUDA_VERSION_MICRO >= (micro))) + +/** + * GARROW_CUDA_DISABLE_DEPRECATION_WARNINGS: + * + * If this macro is defined, no deprecated warnings are produced. + * + * You must define this macro before including the + * arrow-glib/arrow-glib.h header. + * + * Since: 17.0.0 + */ + +#ifdef GARROW_CUDA_DISABLE_DEPRECATION_WARNINGS +# define GARROW_CUDA_DEPRECATED +# define GARROW_CUDA_DEPRECATED_FOR(function) +# define GARROW_CUDA_UNAVAILABLE(major, minor) +#else +# define GARROW_CUDA_DEPRECATED G_DEPRECATED +# define GARROW_CUDA_DEPRECATED_FOR(function) G_DEPRECATED_FOR(function) +# define GARROW_CUDA_UNAVAILABLE(major, minor) G_UNAVAILABLE(major, minor) +#endif + +@ENCODED_VERSIONS@ + +/** + * GARROW_CUDA_VERSION_MIN_REQUIRED: + * + * You can use this macro for compile time API version check. + * + * This macro value must be one of the predefined version macros such + * as %GARROW_CUDA_VERSION_0_10. + * + * If you use any functions that is defined by newer version than + * %GARROW_CUDA_VERSION_MIN_REQUIRED, deprecated warnings are produced at + * compile time. + * + * You must define this macro before including the + * arrow-cuda-glib/arrow-cuda-glib.h header. + * + * Since: 17.0.0 + */ +#ifndef GARROW_CUDA_VERSION_MIN_REQUIRED +# define GARROW_CUDA_VERSION_MIN_REQUIRED GARROW_VERSION_MIN_REQUIRED +#endif + +/** + * GARROW_CUDA_VERSION_MAX_ALLOWED: + * + * You can use this macro for compile time API version check. + * + * This macro value must be one of the predefined version macros such + * as %GARROW_CUDA_VERSION_0_10. + * + * If you use any functions that is defined by newer version than + * %GARROW_CUDA_VERSION_MAX_ALLOWED, deprecated warnings are produced at + * compile time. + * + * You must define this macro before including the + * arrow-cuda-glib/arrow-cuda-glib.h header. + * + * Since: 17.0.0 + */ +#ifndef GARROW_CUDA_VERSION_MAX_ALLOWED +# define GARROW_CUDA_VERSION_MAX_ALLOWED GARROW_VERSION_MAX_ALLOWED +#endif + +@VISIBILITY_MACROS@ + +@AVAILABILITY_MACROS@ diff --git a/c_glib/arrow-dataset-glib/arrow-dataset-glib.h b/c_glib/arrow-dataset-glib/arrow-dataset-glib.h index 58f4e216cc715..7ebf36ddd2b78 100644 --- a/c_glib/arrow-dataset-glib/arrow-dataset-glib.h +++ b/c_glib/arrow-dataset-glib/arrow-dataset-glib.h @@ -21,6 +21,8 @@ #include +#include + #include #include #include diff --git a/c_glib/arrow-dataset-glib/dataset-definition.h b/c_glib/arrow-dataset-glib/dataset-definition.h index f278b05a135f5..bc52d6d3663a3 100644 --- a/c_glib/arrow-dataset-glib/dataset-definition.h +++ b/c_glib/arrow-dataset-glib/dataset-definition.h @@ -21,9 +21,12 @@ #include +#include + G_BEGIN_DECLS #define GADATASET_TYPE_DATASET (gadataset_dataset_get_type()) +GADATASET_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE(GADatasetDataset, gadataset_dataset, GADATASET, DATASET, GObject) struct _GADatasetDatasetClass { diff --git a/c_glib/arrow-dataset-glib/dataset-factory.h b/c_glib/arrow-dataset-glib/dataset-factory.h index 1dc875837fe21..e7d3bc27aea8f 100644 --- a/c_glib/arrow-dataset-glib/dataset-factory.h +++ b/c_glib/arrow-dataset-glib/dataset-factory.h @@ -24,6 +24,7 @@ G_BEGIN_DECLS #define GADATASET_TYPE_FINISH_OPTIONS (gadataset_finish_options_get_type()) +GADATASET_AVAILABLE_IN_11_0 G_DECLARE_DERIVABLE_TYPE( GADatasetFinishOptions, gadataset_finish_options, GADATASET, FINISH_OPTIONS, GObject) struct _GADatasetFinishOptionsClass @@ -31,11 +32,12 @@ struct _GADatasetFinishOptionsClass GObjectClass parent_class; }; -GARROW_AVAILABLE_IN_11_0 +GADATASET_AVAILABLE_IN_11_0 GADatasetFinishOptions * gadataset_finish_options_new(void); #define GADATASET_TYPE_DATASET_FACTORY (gadataset_dataset_factory_get_type()) +GADATASET_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE( GADatasetDatasetFactory, gadataset_dataset_factory, GADATASET, DATASET_FACTORY, GObject) struct _GADatasetDatasetFactoryClass @@ -43,7 +45,7 @@ struct _GADatasetDatasetFactoryClass GObjectClass parent_class; }; -GARROW_AVAILABLE_IN_5_0 +GADATASET_AVAILABLE_IN_5_0 GADatasetDataset * gadataset_dataset_factory_finish(GADatasetDatasetFactory *factory, GADatasetFinishOptions *options, @@ -51,6 +53,7 @@ gadataset_dataset_factory_finish(GADatasetDatasetFactory *factory, #define GADATASET_TYPE_FILE_SYSTEM_DATASET_FACTORY \ (gadataset_file_system_dataset_factory_get_type()) +GADATASET_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE(GADatasetFileSystemDatasetFactory, gadataset_file_system_dataset_factory, GADATASET, @@ -61,32 +64,33 @@ struct _GADatasetFileSystemDatasetFactoryClass GADatasetDatasetFactoryClass parent_class; }; -GARROW_AVAILABLE_IN_5_0 +GADATASET_AVAILABLE_IN_5_0 GADatasetFileSystemDatasetFactory * gadataset_file_system_dataset_factory_new(GADatasetFileFormat *file_format); -GARROW_AVAILABLE_IN_5_0 +GADATASET_AVAILABLE_IN_5_0 gboolean gadataset_file_system_dataset_factory_set_file_system( GADatasetFileSystemDatasetFactory *factory, GArrowFileSystem *file_system, GError **error); +GADATASET_AVAILABLE_IN_5_0 gboolean gadataset_file_system_dataset_factory_set_file_system_uri( GADatasetFileSystemDatasetFactory *factory, const gchar *uri, GError **error); -GARROW_AVAILABLE_IN_5_0 +GADATASET_AVAILABLE_IN_5_0 gboolean gadataset_file_system_dataset_factory_add_path(GADatasetFileSystemDatasetFactory *factory, const gchar *path, GError **error); /* -GARROW_AVAILABLE_IN_5_0 +GADATASET_AVAILABLE_IN_5_0 gboolean gadataset_file_system_dataset_factory_add_file( GADatasetFileSystemDatasetFactory *factory, GArrowFileInfo *file, GError **error); -GARROW_AVAILABLE_IN_5_0 +GADATASET_AVAILABLE_IN_5_0 gboolean gadataset_file_system_dataset_factory_add_selector( GADatasetFileSystemDatasetFactory *factory, @@ -94,7 +98,7 @@ gadataset_file_system_dataset_factory_add_selector( GError **error); */ -GARROW_AVAILABLE_IN_5_0 +GADATASET_AVAILABLE_IN_5_0 GADatasetFileSystemDataset * gadataset_file_system_dataset_factory_finish(GADatasetFileSystemDatasetFactory *factory, GADatasetFinishOptions *options, diff --git a/c_glib/arrow-dataset-glib/dataset.cpp b/c_glib/arrow-dataset-glib/dataset.cpp index 704d6b589ee94..f84e4e3db380a 100644 --- a/c_glib/arrow-dataset-glib/dataset.cpp +++ b/c_glib/arrow-dataset-glib/dataset.cpp @@ -19,6 +19,7 @@ #include #include +#include #include #include @@ -152,12 +153,46 @@ gadataset_dataset_to_table(GADatasetDataset *dataset, GError **error) } auto arrow_scanner = *arrow_scanner_result; auto arrow_table_result = arrow_scanner->ToTable(); - if (!garrow::check(error, arrow_scanner_result, "[dataset][to-table]")) { + if (!garrow::check(error, arrow_table_result, "[dataset][to-table]")) { return NULL; } return garrow_table_new_raw(&(*arrow_table_result)); } +/** + * gadataset_dataset_to_record_batch_reader: + * @dataset: A #GADatasetDataset. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (transfer full) (nullable): + * A #GArrowRecordBatchReader on success, %NULL on error. + * + * Since: 17.0.0 + */ +GArrowRecordBatchReader * +gadataset_dataset_to_record_batch_reader(GADatasetDataset *dataset, GError **error) +{ + auto arrow_dataset = gadataset_dataset_get_raw(dataset); + auto arrow_scanner_builder_result = arrow_dataset->NewScan(); + if (!garrow::check(error, + arrow_scanner_builder_result, + "[dataset][to-record-batch-reader]")) { + return nullptr; + } + auto arrow_scanner_builder = *arrow_scanner_builder_result; + auto arrow_scanner_result = arrow_scanner_builder->Finish(); + if (!garrow::check(error, arrow_scanner_result, "[dataset][to-record-batch-reader]")) { + return nullptr; + } + auto arrow_scanner = *arrow_scanner_result; + auto arrow_reader_result = arrow_scanner->ToRecordBatchReader(); + if (!garrow::check(error, arrow_reader_result, "[dataset][to-record-batch-reader]")) { + return nullptr; + } + auto sources = g_list_prepend(nullptr, dataset); + return garrow_record_batch_reader_new_raw(&(*arrow_reader_result), sources); +} + /** * gadataset_dataset_get_type_name: * @dataset: A #GADatasetDataset. diff --git a/c_glib/arrow-dataset-glib/dataset.h b/c_glib/arrow-dataset-glib/dataset.h index 57f6c7729f073..5b957f0538a2a 100644 --- a/c_glib/arrow-dataset-glib/dataset.h +++ b/c_glib/arrow-dataset-glib/dataset.h @@ -25,18 +25,22 @@ G_BEGIN_DECLS -GARROW_AVAILABLE_IN_5_0 +GADATASET_AVAILABLE_IN_5_0 GADatasetScannerBuilder * gadataset_dataset_begin_scan(GADatasetDataset *dataset, GError **error); -GARROW_AVAILABLE_IN_5_0 +GADATASET_AVAILABLE_IN_5_0 GArrowTable * gadataset_dataset_to_table(GADatasetDataset *dataset, GError **error); -GARROW_AVAILABLE_IN_5_0 +GADATASET_AVAILABLE_IN_5_0 gchar * gadataset_dataset_get_type_name(GADatasetDataset *dataset); +GADATASET_AVAILABLE_IN_17_0 +GArrowRecordBatchReader * +gadataset_dataset_to_record_batch_reader(GADatasetDataset *dataset, GError **error); #define GADATASET_TYPE_FILE_SYSTEM_DATASET_WRITE_OPTIONS \ (gadataset_file_system_dataset_write_options_get_type()) +GADATASET_AVAILABLE_IN_6_0 G_DECLARE_DERIVABLE_TYPE(GADatasetFileSystemDatasetWriteOptions, gadataset_file_system_dataset_write_options, GADATASET, @@ -47,11 +51,12 @@ struct _GADatasetFileSystemDatasetWriteOptionsClass GObjectClass parent_class; }; -GARROW_AVAILABLE_IN_6_0 +GADATASET_AVAILABLE_IN_6_0 GADatasetFileSystemDatasetWriteOptions * gadataset_file_system_dataset_write_options_new(void); #define GADATASET_TYPE_FILE_SYSTEM_DATASET (gadataset_file_system_dataset_get_type()) +GADATASET_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE(GADatasetFileSystemDataset, gadataset_file_system_dataset, GADATASET, @@ -62,7 +67,7 @@ struct _GADatasetFileSystemDatasetClass GADatasetDatasetClass parent_class; }; -GARROW_AVAILABLE_IN_6_0 +GADATASET_AVAILABLE_IN_6_0 gboolean gadataset_file_system_dataset_write_scanner( GADatasetScanner *scanner, diff --git a/c_glib/arrow-dataset-glib/file-format.h b/c_glib/arrow-dataset-glib/file-format.h index 29487e59d70dd..f70523597e7c6 100644 --- a/c_glib/arrow-dataset-glib/file-format.h +++ b/c_glib/arrow-dataset-glib/file-format.h @@ -21,9 +21,12 @@ #include +#include + G_BEGIN_DECLS #define GADATASET_TYPE_FILE_WRITE_OPTIONS (gadataset_file_write_options_get_type()) +GADATASET_AVAILABLE_IN_6_0 G_DECLARE_DERIVABLE_TYPE(GADatasetFileWriteOptions, gadataset_file_write_options, GADATASET, @@ -35,6 +38,7 @@ struct _GADatasetFileWriteOptionsClass }; #define GADATASET_TYPE_FILE_WRITER (gadataset_file_writer_get_type()) +GADATASET_AVAILABLE_IN_6_0 G_DECLARE_DERIVABLE_TYPE( GADatasetFileWriter, gadataset_file_writer, GADATASET, FILE_WRITER, GObject) struct _GADatasetFileWriterClass @@ -42,21 +46,22 @@ struct _GADatasetFileWriterClass GObjectClass parent_class; }; -GARROW_AVAILABLE_IN_6_0 +GADATASET_AVAILABLE_IN_6_0 gboolean gadataset_file_writer_write_record_batch(GADatasetFileWriter *writer, GArrowRecordBatch *record_batch, GError **error); -GARROW_AVAILABLE_IN_6_0 +GADATASET_AVAILABLE_IN_6_0 gboolean gadataset_file_writer_write_record_batch_reader(GADatasetFileWriter *writer, GArrowRecordBatchReader *reader, GError **error); -GARROW_AVAILABLE_IN_6_0 +GADATASET_AVAILABLE_IN_6_0 gboolean gadataset_file_writer_finish(GADatasetFileWriter *writer, GError **error); #define GADATASET_TYPE_FILE_FORMAT (gadataset_file_format_get_type()) +GADATASET_AVAILABLE_IN_3_0 G_DECLARE_DERIVABLE_TYPE( GADatasetFileFormat, gadataset_file_format, GADATASET, FILE_FORMAT, GObject) struct _GADatasetFileFormatClass @@ -64,13 +69,13 @@ struct _GADatasetFileFormatClass GObjectClass parent_class; }; -GARROW_AVAILABLE_IN_3_0 +GADATASET_AVAILABLE_IN_3_0 gchar * gadataset_file_format_get_type_name(GADatasetFileFormat *format); -GARROW_AVAILABLE_IN_6_0 +GADATASET_AVAILABLE_IN_6_0 GADatasetFileWriteOptions * gadataset_file_format_get_default_write_options(GADatasetFileFormat *format); -GARROW_AVAILABLE_IN_6_0 +GADATASET_AVAILABLE_IN_6_0 GADatasetFileWriter * gadataset_file_format_open_writer(GADatasetFileFormat *format, GArrowOutputStream *destination, @@ -80,12 +85,13 @@ gadataset_file_format_open_writer(GADatasetFileFormat *format, GADatasetFileWriteOptions *options, GError **error); -GARROW_AVAILABLE_IN_3_0 +GADATASET_AVAILABLE_IN_3_0 gboolean gadataset_file_format_equal(GADatasetFileFormat *format, GADatasetFileFormat *other_format); #define GADATASET_TYPE_CSV_FILE_FORMAT (gadataset_csv_file_format_get_type()) +GADATASET_AVAILABLE_IN_3_0 G_DECLARE_DERIVABLE_TYPE(GADatasetCSVFileFormat, gadataset_csv_file_format, GADATASET, @@ -96,11 +102,12 @@ struct _GADatasetCSVFileFormatClass GADatasetFileFormatClass parent_class; }; -GARROW_AVAILABLE_IN_3_0 +GADATASET_AVAILABLE_IN_3_0 GADatasetCSVFileFormat * gadataset_csv_file_format_new(void); #define GADATASET_TYPE_IPC_FILE_FORMAT (gadataset_ipc_file_format_get_type()) +GADATASET_AVAILABLE_IN_3_0 G_DECLARE_DERIVABLE_TYPE(GADatasetIPCFileFormat, gadataset_ipc_file_format, GADATASET, @@ -111,11 +118,12 @@ struct _GADatasetIPCFileFormatClass GADatasetFileFormatClass parent_class; }; -GARROW_AVAILABLE_IN_3_0 +GADATASET_AVAILABLE_IN_3_0 GADatasetIPCFileFormat * gadataset_ipc_file_format_new(void); #define GADATASET_TYPE_PARQUET_FILE_FORMAT (gadataset_parquet_file_format_get_type()) +GADATASET_AVAILABLE_IN_3_0 G_DECLARE_DERIVABLE_TYPE(GADatasetParquetFileFormat, gadataset_parquet_file_format, GADATASET, @@ -126,7 +134,7 @@ struct _GADatasetParquetFileFormatClass GADatasetFileFormatClass parent_class; }; -GARROW_AVAILABLE_IN_3_0 +GADATASET_AVAILABLE_IN_3_0 GADatasetParquetFileFormat * gadataset_parquet_file_format_new(void); diff --git a/c_glib/arrow-dataset-glib/fragment.h b/c_glib/arrow-dataset-glib/fragment.h index 49acc360a3679..80eb9e19df3cc 100644 --- a/c_glib/arrow-dataset-glib/fragment.h +++ b/c_glib/arrow-dataset-glib/fragment.h @@ -21,11 +21,14 @@ #include +#include + G_BEGIN_DECLS /* arrow::dataset::Fragment */ #define GADATASET_TYPE_FRAGMENT (gadataset_fragment_get_type()) +GADATASET_AVAILABLE_IN_4_0 G_DECLARE_DERIVABLE_TYPE( GADatasetFragment, gadataset_fragment, GADATASET, FRAGMENT, GObject) struct _GADatasetFragmentClass @@ -36,6 +39,7 @@ struct _GADatasetFragmentClass /* arrow::dataset::InMemoryFragment */ #define GADATASET_TYPE_IN_MEMORY_FRAGMENT (gadataset_in_memory_fragment_get_type()) +GADATASET_AVAILABLE_IN_4_0 G_DECLARE_DERIVABLE_TYPE(GADatasetInMemoryFragment, gadataset_in_memory_fragment, GADATASET, @@ -46,7 +50,7 @@ struct _GADatasetInMemoryFragmentClass GADatasetFragmentClass parent_class; }; -GARROW_AVAILABLE_IN_4_0 +GADATASET_AVAILABLE_IN_4_0 GADatasetInMemoryFragment * gadataset_in_memory_fragment_new(GArrowSchema *schema, GArrowRecordBatch **record_batches, diff --git a/c_glib/arrow-dataset-glib/meson.build b/c_glib/arrow-dataset-glib/meson.build index 0c869a4183efa..2d54efadfa230 100644 --- a/c_glib/arrow-dataset-glib/meson.build +++ b/c_glib/arrow-dataset-glib/meson.build @@ -17,6 +17,8 @@ # specific language governing permissions and limitations # under the License. +project_name = 'arrow-dataset-glib' + sources = files( 'dataset-factory.cpp', 'dataset.cpp', @@ -47,20 +49,27 @@ cpp_headers = files( 'scanner.hpp', ) +version_h = configure_file( + input: 'version.h.in', + output: 'version.h', + command: [python3, generate_version_header_py, '--library', 'GADATASET', '--version', version, '--input', '@INPUT@', '--output', '@OUTPUT@'], +) + +c_headers += version_h + enums = gnome.mkenums('enums', sources: c_headers, identifier_prefix: 'GADataset', symbol_prefix: 'gadataset', c_template: 'enums.c.template', h_template: 'enums.h.template', - install_dir: join_paths(include_dir, 'arrow-dataset-glib'), + install_dir: join_paths(include_dir, project_name), install_header: true) enums_source = enums[0] enums_header = enums[1] - headers = c_headers + cpp_headers -install_headers(headers, subdir: 'arrow-dataset-glib') +install_headers(headers, subdir: project_name) dependencies = [ arrow_dataset, @@ -72,6 +81,7 @@ libarrow_dataset_glib = library('arrow-dataset-glib', dependencies: dependencies, implicit_include_directories: false, include_directories: base_include_directories, + cpp_args: ['-DGADATASET_COMPILATION'], soversion: so_version, version: library_version) arrow_dataset_glib = declare_dependency(link_with: libarrow_dataset_glib, diff --git a/c_glib/arrow-dataset-glib/partitioning.h b/c_glib/arrow-dataset-glib/partitioning.h index ccf476272e429..7671958d88e61 100644 --- a/c_glib/arrow-dataset-glib/partitioning.h +++ b/c_glib/arrow-dataset-glib/partitioning.h @@ -21,6 +21,8 @@ #include +#include + G_BEGIN_DECLS /** @@ -39,6 +41,7 @@ typedef enum { #define GADATASET_TYPE_PARTITIONING_FACTORY_OPTIONS \ (gadataset_partitioning_factory_options_get_type()) +GADATASET_AVAILABLE_IN_11_0 G_DECLARE_DERIVABLE_TYPE(GADatasetPartitioningFactoryOptions, gadataset_partitioning_factory_options, GADATASET, @@ -49,11 +52,12 @@ struct _GADatasetPartitioningFactoryOptionsClass GObjectClass parent_class; }; -GARROW_AVAILABLE_IN_11_0 +GADATASET_AVAILABLE_IN_11_0 GADatasetPartitioningFactoryOptions * gadataset_partitioning_factory_options_new(void); #define GADATASET_TYPE_PARTITIONING (gadataset_partitioning_get_type()) +GADATASET_AVAILABLE_IN_6_0 G_DECLARE_DERIVABLE_TYPE( GADatasetPartitioning, gadataset_partitioning, GADATASET, PARTITIONING, GObject) struct _GADatasetPartitioningClass @@ -61,16 +65,17 @@ struct _GADatasetPartitioningClass GObjectClass parent_class; }; -GARROW_AVAILABLE_IN_6_0 +GADATASET_AVAILABLE_IN_6_0 gchar * gadataset_partitioning_get_type_name(GADatasetPartitioning *partitioning); -GARROW_AVAILABLE_IN_12_0 +GADATASET_AVAILABLE_IN_12_0 GADatasetPartitioning * gadataset_partitioning_create_default(void); #define GADATASET_TYPE_KEY_VALUE_PARTITIONING_OPTIONS \ (gadataset_key_value_partitioning_options_get_type()) +GADATASET_AVAILABLE_IN_11_0 G_DECLARE_DERIVABLE_TYPE(GADatasetKeyValuePartitioningOptions, gadataset_key_value_partitioning_options, GADATASET, @@ -81,12 +86,13 @@ struct _GADatasetKeyValuePartitioningOptionsClass GObjectClass parent_class; }; -GARROW_AVAILABLE_IN_11_0 +GADATASET_AVAILABLE_IN_11_0 GADatasetKeyValuePartitioningOptions * gadataset_key_value_partitioning_options_new(void); #define GADATASET_TYPE_KEY_VALUE_PARTITIONING \ (gadataset_key_value_partitioning_get_type()) +GADATASET_AVAILABLE_IN_6_0 G_DECLARE_DERIVABLE_TYPE(GADatasetKeyValuePartitioning, gadataset_key_value_partitioning, GADATASET, @@ -99,6 +105,7 @@ struct _GADatasetKeyValuePartitioningClass #define GADATASET_TYPE_DIRECTORY_PARTITIONING \ (gadataset_directory_partitioning_get_type()) +GADATASET_AVAILABLE_IN_6_0 G_DECLARE_DERIVABLE_TYPE(GADatasetDirectoryPartitioning, gadataset_directory_partitioning, GADATASET, @@ -109,7 +116,7 @@ struct _GADatasetDirectoryPartitioningClass GADatasetKeyValuePartitioningClass parent_class; }; -GARROW_AVAILABLE_IN_6_0 +GADATASET_AVAILABLE_IN_6_0 GADatasetDirectoryPartitioning * gadataset_directory_partitioning_new(GArrowSchema *schema, GList *dictionaries, @@ -118,6 +125,7 @@ gadataset_directory_partitioning_new(GArrowSchema *schema, #define GADATASET_TYPE_HIVE_PARTITIONING_OPTIONS \ (gadataset_hive_partitioning_options_get_type()) +GADATASET_AVAILABLE_IN_11_0 G_DECLARE_DERIVABLE_TYPE(GADatasetHivePartitioningOptions, gadataset_hive_partitioning_options, GADATASET, @@ -128,11 +136,12 @@ struct _GADatasetHivePartitioningOptionsClass GADatasetKeyValuePartitioningOptionsClass parent_class; }; -GARROW_AVAILABLE_IN_11_0 +GADATASET_AVAILABLE_IN_11_0 GADatasetHivePartitioningOptions * gadataset_hive_partitioning_options_new(void); #define GADATASET_TYPE_HIVE_PARTITIONING (gadataset_hive_partitioning_get_type()) +GADATASET_AVAILABLE_IN_11_0 G_DECLARE_DERIVABLE_TYPE(GADatasetHivePartitioning, gadataset_hive_partitioning, GADATASET, @@ -143,13 +152,13 @@ struct _GADatasetHivePartitioningClass GADatasetKeyValuePartitioningClass parent_class; }; -GARROW_AVAILABLE_IN_11_0 +GADATASET_AVAILABLE_IN_11_0 GADatasetHivePartitioning * gadataset_hive_partitioning_new(GArrowSchema *schema, GList *dictionaries, GADatasetHivePartitioningOptions *options, GError **error); -GARROW_AVAILABLE_IN_11_0 +GADATASET_AVAILABLE_IN_11_0 gchar * gadataset_hive_partitioning_get_null_fallback(GADatasetHivePartitioning *partitioning); diff --git a/c_glib/arrow-dataset-glib/scanner.cpp b/c_glib/arrow-dataset-glib/scanner.cpp index 717532db9220f..28af1f16e5968 100644 --- a/c_glib/arrow-dataset-glib/scanner.cpp +++ b/c_glib/arrow-dataset-glib/scanner.cpp @@ -128,6 +128,28 @@ gadataset_scanner_to_table(GADatasetScanner *scanner, GError **error) } } +/** + * gadataset_scanner_to_record_batch_reader: + * @scanner: A #GADatasetScanner. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (transfer full) (nullable): + * A #GArrowRecordBatchReader on success, %NULL on error. + * + * Since: 17.0.0 + */ +GArrowRecordBatchReader * +gadataset_scanner_to_record_batch_reader(GADatasetScanner *scanner, GError **error) +{ + auto arrow_scanner = gadataset_scanner_get_raw(scanner); + auto arrow_reader_result = arrow_scanner->ToRecordBatchReader(); + if (!garrow::check(error, arrow_reader_result, "[scanner][to-record-batch-reader]")) { + return nullptr; + } + auto sources = g_list_prepend(nullptr, scanner); + return garrow_record_batch_reader_new_raw(&(*arrow_reader_result), sources); +} + typedef struct GADatasetScannerBuilderPrivate_ { std::shared_ptr scanner_builder; diff --git a/c_glib/arrow-dataset-glib/scanner.h b/c_glib/arrow-dataset-glib/scanner.h index 3c7432fb268e4..d92eca5ab8420 100644 --- a/c_glib/arrow-dataset-glib/scanner.h +++ b/c_glib/arrow-dataset-glib/scanner.h @@ -21,21 +21,28 @@ #include #include +#include G_BEGIN_DECLS #define GADATASET_TYPE_SCANNER (gadataset_scanner_get_type()) +GADATASET_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE(GADatasetScanner, gadataset_scanner, GADATASET, SCANNER, GObject) struct _GADatasetScannerClass { GObjectClass parent_class; }; -GARROW_AVAILABLE_IN_5_0 +GADATASET_AVAILABLE_IN_5_0 GArrowTable * gadataset_scanner_to_table(GADatasetScanner *scanner, GError **error); +GADATASET_AVAILABLE_IN_17_0 +GArrowRecordBatchReader * +gadataset_scanner_to_record_batch_reader(GADatasetScanner *scanner, GError **error); + #define GADATASET_TYPE_SCANNER_BUILDER (gadataset_scanner_builder_get_type()) +GADATASET_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE( GADatasetScannerBuilder, gadataset_scanner_builder, GADATASET, SCANNER_BUILDER, GObject) struct _GADatasetScannerBuilderClass @@ -43,20 +50,20 @@ struct _GADatasetScannerBuilderClass GObjectClass parent_class; }; -GARROW_AVAILABLE_IN_5_0 +GADATASET_AVAILABLE_IN_5_0 GADatasetScannerBuilder * gadataset_scanner_builder_new(GADatasetDataset *dataset, GError **error); -GARROW_AVAILABLE_IN_6_0 +GADATASET_AVAILABLE_IN_6_0 GADatasetScannerBuilder * gadataset_scanner_builder_new_record_batch_reader(GArrowRecordBatchReader *reader); -GARROW_AVAILABLE_IN_6_0 +GADATASET_AVAILABLE_IN_6_0 gboolean gadataset_scanner_builder_set_filter(GADatasetScannerBuilder *builder, GArrowExpression *expression, GError **error); -GARROW_AVAILABLE_IN_5_0 +GADATASET_AVAILABLE_IN_5_0 GADatasetScanner * gadataset_scanner_builder_finish(GADatasetScannerBuilder *builder, GError **error); diff --git a/c_glib/arrow-dataset-glib/version.h.in b/c_glib/arrow-dataset-glib/version.h.in new file mode 100644 index 0000000000000..7e678bda3a875 --- /dev/null +++ b/c_glib/arrow-dataset-glib/version.h.in @@ -0,0 +1,159 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include + +/** + * SECTION: version + * @section_id: version-macros + * @title: Version related macros + * @include: arrow-dataset-glib/arrow-dataset-glib.h + * + * Apache Arrow Dataset GLib provides macros that can be used by C pre-processor. + * They are useful to check version related things at compile time. + */ + +/** + * GADATASET_VERSION_MAJOR: + * + * The major version. + * + * Since: 17.0.0 + */ +#define GADATASET_VERSION_MAJOR (@VERSION_MAJOR@) + +/** + * GADATASET_VERSION_MINOR: + * + * The minor version. + * + * Since: 17.0.0 + */ +#define GADATASET_VERSION_MINOR (@VERSION_MINOR@) + +/** + * GADATASET_VERSION_MICRO: + * + * The micro version. + * + * Since: 17.0.0 + */ +#define GADATASET_VERSION_MICRO (@VERSION_MICRO@) + +/** + * GADATASET_VERSION_TAG: + * + * The version tag. Normally, it's an empty string. It's "SNAPSHOT" + * for snapshot version. + * + * Since: 17.0.0 + */ +#define GADATASET_VERSION_TAG "@VERSION_TAG@" + +/** + * GADATASET_VERSION_CHECK: + * @major: A major version to check for. + * @minor: A minor version to check for. + * @micro: A micro version to check for. + * + * You can use this macro in C pre-processor. + * + * Returns: %TRUE if the compile time Apache Arrow GLib version is the + * same as or newer than the passed version, %FALSE otherwise. + * + * Since: 17.0.0 + */ +#define GADATASET_VERSION_CHECK(major, minor, micro) \ + (GADATASET_VERSION_MAJOR > (major) || \ + (GADATASET_VERSION_MAJOR == (major) && \ + GADATASET_VERSION_MINOR > (minor)) || \ + (GADATASET_VERSION_MAJOR == (major) && \ + GADATASET_VERSION_MINOR == (minor) && \ + GADATASET_VERSION_MICRO >= (micro))) + +/** + * GADATASET_DISABLE_DEPRECATION_WARNINGS: + * + * If this macro is defined, no deprecated warnings are produced. + * + * You must define this macro before including the + * arrow-glib/arrow-glib.h header. + * + * Since: 17.0.0 + */ + +#ifdef GADATASET_DISABLE_DEPRECATION_WARNINGS +# define GADATASET_DEPRECATED +# define GADATASET_DEPRECATED_FOR(function) +# define GADATASET_UNAVAILABLE(major, minor) +#else +# define GADATASET_DEPRECATED G_DEPRECATED +# define GADATASET_DEPRECATED_FOR(function) G_DEPRECATED_FOR(function) +# define GADATASET_UNAVAILABLE(major, minor) G_UNAVAILABLE(major, minor) +#endif + +@ENCODED_VERSIONS@ + +/** + * GADATASET_VERSION_MIN_REQUIRED: + * + * You can use this macro for compile time API version check. + * + * This macro value must be one of the predefined version macros such + * as %GADATASET_VERSION_0_10. + * + * If you use any functions that is defined by newer version than + * %GADATASET_VERSION_MIN_REQUIRED, deprecated warnings are produced at + * compile time. + * + * You must define this macro before including the + * arrow-dataset-glib/arrow-dataset-glib.h header. + * + * Since: 17.0.0 + */ +#ifndef GADATASET_VERSION_MIN_REQUIRED +# define GADATASET_VERSION_MIN_REQUIRED GARROW_VERSION_MIN_REQUIRED +#endif + +/** + * GADATASET_VERSION_MAX_ALLOWED: + * + * You can use this macro for compile time API version check. + * + * This macro value must be one of the predefined version macros such + * as %GADATASET_VERSION_0_10. + * + * If you use any functions that is defined by newer version than + * %GADATASET_VERSION_MAX_ALLOWED, deprecated warnings are produced at + * compile time. + * + * You must define this macro before including the + * arrow-dataset-glib/arrow-dataset-glib.h header. + * + * Since: 17.0.0 + */ +#ifndef GADATASET_VERSION_MAX_ALLOWED +# define GADATASET_VERSION_MAX_ALLOWED GARROW_VERSION_MAX_ALLOWED +#endif + +@VISIBILITY_MACROS@ + +@AVAILABILITY_MACROS@ diff --git a/c_glib/arrow-flight-glib/arrow-flight-glib.h b/c_glib/arrow-flight-glib/arrow-flight-glib.h index 6fc8f43d8406e..7e973dd125dd4 100644 --- a/c_glib/arrow-flight-glib/arrow-flight-glib.h +++ b/c_glib/arrow-flight-glib/arrow-flight-glib.h @@ -19,6 +19,8 @@ #pragma once +#include + #include #include #include diff --git a/c_glib/arrow-flight-glib/client.h b/c_glib/arrow-flight-glib/client.h index f67d58371d583..a91bbe55e3c04 100644 --- a/c_glib/arrow-flight-glib/client.h +++ b/c_glib/arrow-flight-glib/client.h @@ -24,6 +24,7 @@ G_BEGIN_DECLS #define GAFLIGHT_TYPE_STREAM_READER (gaflight_stream_reader_get_type()) +GAFLIGHT_AVAILABLE_IN_6_0 G_DECLARE_DERIVABLE_TYPE(GAFlightStreamReader, gaflight_stream_reader, GAFLIGHT, @@ -35,6 +36,7 @@ struct _GAFlightStreamReaderClass }; #define GAFLIGHT_TYPE_CALL_OPTIONS (gaflight_call_options_get_type()) +GAFLIGHT_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE( GAFlightCallOptions, gaflight_call_options, GAFLIGHT, CALL_OPTIONS, GObject) struct _GAFlightCallOptionsClass @@ -42,25 +44,26 @@ struct _GAFlightCallOptionsClass GObjectClass parent_class; }; -GARROW_AVAILABLE_IN_5_0 +GAFLIGHT_AVAILABLE_IN_5_0 GAFlightCallOptions * gaflight_call_options_new(void); -GARROW_AVAILABLE_IN_9_0 +GAFLIGHT_AVAILABLE_IN_9_0 void gaflight_call_options_add_header(GAFlightCallOptions *options, const gchar *name, const gchar *value); -GARROW_AVAILABLE_IN_9_0 +GAFLIGHT_AVAILABLE_IN_9_0 void gaflight_call_options_clear_headers(GAFlightCallOptions *options); -GARROW_AVAILABLE_IN_9_0 +GAFLIGHT_AVAILABLE_IN_9_0 void gaflight_call_options_foreach_header(GAFlightCallOptions *options, GAFlightHeaderFunc func, gpointer user_data); #define GAFLIGHT_TYPE_CLIENT_OPTIONS (gaflight_client_options_get_type()) +GAFLIGHT_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE( GAFlightClientOptions, gaflight_client_options, GAFLIGHT, CLIENT_OPTIONS, GObject) struct _GAFlightClientOptionsClass @@ -68,28 +71,29 @@ struct _GAFlightClientOptionsClass GObjectClass parent_class; }; -GARROW_AVAILABLE_IN_5_0 +GAFLIGHT_AVAILABLE_IN_5_0 GAFlightClientOptions * gaflight_client_options_new(void); #define GAFLIGHT_TYPE_CLIENT (gaflight_client_get_type()) +GAFLIGHT_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE(GAFlightClient, gaflight_client, GAFLIGHT, CLIENT, GObject) struct _GAFlightClientClass { GObjectClass parent_class; }; -GARROW_AVAILABLE_IN_5_0 +GAFLIGHT_AVAILABLE_IN_5_0 GAFlightClient * gaflight_client_new(GAFlightLocation *location, GAFlightClientOptions *options, GError **error); -GARROW_AVAILABLE_IN_8_0 +GAFLIGHT_AVAILABLE_IN_8_0 gboolean gaflight_client_close(GAFlightClient *client, GError **error); -GARROW_AVAILABLE_IN_12_0 +GAFLIGHT_AVAILABLE_IN_12_0 gboolean gaflight_client_authenticate_basic_token(GAFlightClient *client, const gchar *user, @@ -99,21 +103,21 @@ gaflight_client_authenticate_basic_token(GAFlightClient *client, gchar **bearer_value, GError **error); -GARROW_AVAILABLE_IN_5_0 +GAFLIGHT_AVAILABLE_IN_5_0 GList * gaflight_client_list_flights(GAFlightClient *client, GAFlightCriteria *criteria, GAFlightCallOptions *options, GError **error); -GARROW_AVAILABLE_IN_9_0 +GAFLIGHT_AVAILABLE_IN_9_0 GAFlightInfo * gaflight_client_get_flight_info(GAFlightClient *client, GAFlightDescriptor *descriptor, GAFlightCallOptions *options, GError **error); -GARROW_AVAILABLE_IN_6_0 +GAFLIGHT_AVAILABLE_IN_6_0 GAFlightStreamReader * gaflight_client_do_get(GAFlightClient *client, GAFlightTicket *ticket, diff --git a/c_glib/arrow-flight-glib/client.hpp b/c_glib/arrow-flight-glib/client.hpp index 6d7bdcecf3006..185a28e6dc4bd 100644 --- a/c_glib/arrow-flight-glib/client.hpp +++ b/c_glib/arrow-flight-glib/client.hpp @@ -23,17 +23,23 @@ #include +GAFLIGHT_EXTERN GAFlightStreamReader * gaflight_stream_reader_new_raw(arrow::flight::FlightStreamReader *flight_reader, gboolean is_owner); +GAFLIGHT_EXTERN arrow::flight::FlightCallOptions * gaflight_call_options_get_raw(GAFlightCallOptions *options); +GAFLIGHT_EXTERN arrow::flight::FlightClientOptions * gaflight_client_options_get_raw(GAFlightClientOptions *options); +GAFLIGHT_EXTERN std::shared_ptr gaflight_client_get_raw(GAFlightClient *client); + +GAFLIGHT_EXTERN GAFlightClient * gaflight_client_new_raw(std::shared_ptr *flight_client); diff --git a/c_glib/arrow-flight-glib/common.h b/c_glib/arrow-flight-glib/common.h index fcb23b1885ea7..b1d89f79c357e 100644 --- a/c_glib/arrow-flight-glib/common.h +++ b/c_glib/arrow-flight-glib/common.h @@ -21,6 +21,8 @@ #include +#include + G_BEGIN_DECLS typedef void (*GAFlightHeaderFunc)(const gchar *name, @@ -28,40 +30,43 @@ typedef void (*GAFlightHeaderFunc)(const gchar *name, gpointer user_data); #define GAFLIGHT_TYPE_CRITERIA (gaflight_criteria_get_type()) +GAFLIGHT_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE(GAFlightCriteria, gaflight_criteria, GAFLIGHT, CRITERIA, GObject) struct _GAFlightCriteriaClass { GObjectClass parent_class; }; -GARROW_AVAILABLE_IN_5_0 +GAFLIGHT_AVAILABLE_IN_5_0 GAFlightCriteria * gaflight_criteria_new(GBytes *expression); #define GAFLIGHT_TYPE_LOCATION (gaflight_location_get_type()) +GAFLIGHT_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE(GAFlightLocation, gaflight_location, GAFLIGHT, LOCATION, GObject) struct _GAFlightLocationClass { GObjectClass parent_class; }; -GARROW_AVAILABLE_IN_5_0 +GAFLIGHT_AVAILABLE_IN_5_0 GAFlightLocation * gaflight_location_new(const gchar *uri, GError **error); -GARROW_AVAILABLE_IN_5_0 +GAFLIGHT_AVAILABLE_IN_5_0 gchar * gaflight_location_to_string(GAFlightLocation *location); -GARROW_AVAILABLE_IN_5_0 +GAFLIGHT_AVAILABLE_IN_5_0 gchar * gaflight_location_get_scheme(GAFlightLocation *location); -GARROW_AVAILABLE_IN_5_0 +GAFLIGHT_AVAILABLE_IN_5_0 gboolean gaflight_location_equal(GAFlightLocation *location, GAFlightLocation *other_location); #define GAFLIGHT_TYPE_DESCRIPTOR (gaflight_descriptor_get_type()) +GAFLIGHT_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE( GAFlightDescriptor, gaflight_descriptor, GAFLIGHT, DESCRIPTOR, GObject) struct _GAFlightDescriptorClass @@ -69,16 +74,17 @@ struct _GAFlightDescriptorClass GObjectClass parent_class; }; -GARROW_AVAILABLE_IN_5_0 +GAFLIGHT_AVAILABLE_IN_5_0 gchar * gaflight_descriptor_to_string(GAFlightDescriptor *descriptor); -GARROW_AVAILABLE_IN_5_0 +GAFLIGHT_AVAILABLE_IN_5_0 gboolean gaflight_descriptor_equal(GAFlightDescriptor *descriptor, GAFlightDescriptor *other_descriptor); #define GAFLIGHT_TYPE_PATH_DESCRIPTOR (gaflight_path_descriptor_get_type()) +GAFLIGHT_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE(GAFlightPathDescriptor, gaflight_path_descriptor, GAFLIGHT, @@ -89,15 +95,16 @@ struct _GAFlightPathDescriptorClass GAFlightDescriptorClass parent_class; }; -GARROW_AVAILABLE_IN_5_0 +GAFLIGHT_AVAILABLE_IN_5_0 GAFlightPathDescriptor * gaflight_path_descriptor_new(const gchar **paths, gsize n_paths); -GARROW_AVAILABLE_IN_5_0 +GAFLIGHT_AVAILABLE_IN_5_0 gchar ** gaflight_path_descriptor_get_paths(GAFlightPathDescriptor *descriptor); #define GAFLIGHT_TYPE_COMMAND_DESCRIPTOR (gaflight_command_descriptor_get_type()) +GAFLIGHT_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE(GAFlightCommandDescriptor, gaflight_command_descriptor, GAFLIGHT, @@ -108,56 +115,59 @@ struct _GAFlightCommandDescriptorClass GAFlightDescriptorClass parent_class; }; -GARROW_AVAILABLE_IN_5_0 +GAFLIGHT_AVAILABLE_IN_5_0 GAFlightCommandDescriptor * gaflight_command_descriptor_new(const gchar *command); -GARROW_AVAILABLE_IN_5_0 +GAFLIGHT_AVAILABLE_IN_5_0 gchar * gaflight_command_descriptor_get_command(GAFlightCommandDescriptor *descriptor); #define GAFLIGHT_TYPE_TICKET (gaflight_ticket_get_type()) +GAFLIGHT_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE(GAFlightTicket, gaflight_ticket, GAFLIGHT, TICKET, GObject) struct _GAFlightTicketClass { GObjectClass parent_class; }; -GARROW_AVAILABLE_IN_5_0 +GAFLIGHT_AVAILABLE_IN_5_0 GAFlightTicket * gaflight_ticket_new(GBytes *data); -GARROW_AVAILABLE_IN_5_0 +GAFLIGHT_AVAILABLE_IN_5_0 gboolean gaflight_ticket_equal(GAFlightTicket *ticket, GAFlightTicket *other_ticket); #define GAFLIGHT_TYPE_ENDPOINT (gaflight_endpoint_get_type()) +GAFLIGHT_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE(GAFlightEndpoint, gaflight_endpoint, GAFLIGHT, ENDPOINT, GObject) struct _GAFlightEndpointClass { GObjectClass parent_class; }; -GARROW_AVAILABLE_IN_5_0 +GAFLIGHT_AVAILABLE_IN_5_0 GAFlightEndpoint * gaflight_endpoint_new(GAFlightTicket *ticket, GList *locations); -GARROW_AVAILABLE_IN_5_0 +GAFLIGHT_AVAILABLE_IN_5_0 gboolean gaflight_endpoint_equal(GAFlightEndpoint *endpoint, GAFlightEndpoint *other_endpoint); -GARROW_AVAILABLE_IN_5_0 +GAFLIGHT_AVAILABLE_IN_5_0 GList * gaflight_endpoint_get_locations(GAFlightEndpoint *endpoint); #define GAFLIGHT_TYPE_INFO (gaflight_info_get_type()) +GAFLIGHT_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE(GAFlightInfo, gaflight_info, GAFLIGHT, INFO, GObject) struct _GAFlightInfoClass { GObjectClass parent_class; }; -GARROW_AVAILABLE_IN_5_0 +GAFLIGHT_AVAILABLE_IN_5_0 GAFlightInfo * gaflight_info_new(GArrowSchema *schema, GAFlightDescriptor *descriptor, @@ -166,27 +176,28 @@ gaflight_info_new(GArrowSchema *schema, gint64 total_bytes, GError **error); -GARROW_AVAILABLE_IN_5_0 +GAFLIGHT_AVAILABLE_IN_5_0 gboolean gaflight_info_equal(GAFlightInfo *info, GAFlightInfo *other_info); -GARROW_AVAILABLE_IN_5_0 +GAFLIGHT_AVAILABLE_IN_5_0 GArrowSchema * gaflight_info_get_schema(GAFlightInfo *info, GArrowReadOptions *options, GError **error); -GARROW_AVAILABLE_IN_5_0 +GAFLIGHT_AVAILABLE_IN_5_0 GAFlightDescriptor * gaflight_info_get_descriptor(GAFlightInfo *info); -GARROW_AVAILABLE_IN_5_0 +GAFLIGHT_AVAILABLE_IN_5_0 GList * gaflight_info_get_endpoints(GAFlightInfo *info); -GARROW_AVAILABLE_IN_5_0 +GAFLIGHT_AVAILABLE_IN_5_0 gint64 gaflight_info_get_total_records(GAFlightInfo *info); -GARROW_AVAILABLE_IN_5_0 +GAFLIGHT_AVAILABLE_IN_5_0 gint64 gaflight_info_get_total_bytes(GAFlightInfo *info); #define GAFLIGHT_TYPE_STREAM_CHUNK (gaflight_stream_chunk_get_type()) +GAFLIGHT_AVAILABLE_IN_6_0 G_DECLARE_DERIVABLE_TYPE( GAFlightStreamChunk, gaflight_stream_chunk, GAFLIGHT, STREAM_CHUNK, GObject) struct _GAFlightStreamChunkClass @@ -194,14 +205,15 @@ struct _GAFlightStreamChunkClass GObjectClass parent_class; }; -GARROW_AVAILABLE_IN_6_0 +GAFLIGHT_AVAILABLE_IN_6_0 GArrowRecordBatch * gaflight_stream_chunk_get_data(GAFlightStreamChunk *chunk); -GARROW_AVAILABLE_IN_6_0 +GAFLIGHT_AVAILABLE_IN_6_0 GArrowBuffer * gaflight_stream_chunk_get_metadata(GAFlightStreamChunk *chunk); #define GAFLIGHT_TYPE_RECORD_BATCH_READER (gaflight_record_batch_reader_get_type()) +GAFLIGHT_AVAILABLE_IN_6_0 G_DECLARE_DERIVABLE_TYPE(GAFlightRecordBatchReader, gaflight_record_batch_reader, GAFLIGHT, @@ -212,11 +224,11 @@ struct _GAFlightRecordBatchReaderClass GObjectClass parent_class; }; -GARROW_AVAILABLE_IN_6_0 +GAFLIGHT_AVAILABLE_IN_6_0 GAFlightStreamChunk * gaflight_record_batch_reader_read_next(GAFlightRecordBatchReader *reader, GError **error); -GARROW_AVAILABLE_IN_6_0 +GAFLIGHT_AVAILABLE_IN_6_0 GArrowTable * gaflight_record_batch_reader_read_all(GAFlightRecordBatchReader *reader, GError **error); diff --git a/c_glib/arrow-flight-glib/common.hpp b/c_glib/arrow-flight-glib/common.hpp index b748d6f382184..db56fff579baf 100644 --- a/c_glib/arrow-flight-glib/common.hpp +++ b/c_glib/arrow-flight-glib/common.hpp @@ -23,39 +23,59 @@ #include +GAFLIGHT_EXTERN GAFlightCriteria * gaflight_criteria_new_raw(const arrow::flight::Criteria *flight_criteria); + +GAFLIGHT_EXTERN arrow::flight::Criteria * gaflight_criteria_get_raw(GAFlightCriteria *criteria); +GAFLIGHT_EXTERN arrow::flight::Location * gaflight_location_get_raw(GAFlightLocation *location); +GAFLIGHT_EXTERN GAFlightDescriptor * gaflight_descriptor_new_raw(const arrow::flight::FlightDescriptor *flight_descriptor); + +GAFLIGHT_EXTERN arrow::flight::FlightDescriptor * gaflight_descriptor_get_raw(GAFlightDescriptor *descriptor); +GAFLIGHT_EXTERN GAFlightTicket * gaflight_ticket_new_raw(const arrow::flight::Ticket *flight_ticket); + +GAFLIGHT_EXTERN arrow::flight::Ticket * gaflight_ticket_get_raw(GAFlightTicket *ticket); +GAFLIGHT_EXTERN GAFlightEndpoint * gaflight_endpoint_new_raw(const arrow::flight::FlightEndpoint *flight_endpoint, GAFlightTicket *ticket); + +GAFLIGHT_EXTERN arrow::flight::FlightEndpoint * gaflight_endpoint_get_raw(GAFlightEndpoint *endpoint); +GAFLIGHT_EXTERN GAFlightInfo * gaflight_info_new_raw(arrow::flight::FlightInfo *flight_info); + +GAFLIGHT_EXTERN arrow::flight::FlightInfo * gaflight_info_get_raw(GAFlightInfo *info); +GAFLIGHT_EXTERN GAFlightStreamChunk * gaflight_stream_chunk_new_raw(arrow::flight::FlightStreamChunk *flight_chunk); + +GAFLIGHT_EXTERN arrow::flight::FlightStreamChunk * gaflight_stream_chunk_get_raw(GAFlightStreamChunk *chunk); +GAFLIGHT_EXTERN arrow::flight::MetadataRecordBatchReader * gaflight_record_batch_reader_get_raw(GAFlightRecordBatchReader *reader); diff --git a/c_glib/arrow-flight-glib/meson.build b/c_glib/arrow-flight-glib/meson.build index 70db7400b124a..c1422e0d10a7d 100644 --- a/c_glib/arrow-flight-glib/meson.build +++ b/c_glib/arrow-flight-glib/meson.build @@ -37,6 +37,14 @@ cpp_headers = files( 'server.hpp', ) +version_h = configure_file( + input: 'version.h.in', + output: 'version.h', + command: [python3, generate_version_header_py, '--library', 'GAFLIGHT', '--version', version, '--input', '@INPUT@', '--output', '@OUTPUT@'], +) + +c_headers += version_h + headers = c_headers + cpp_headers install_headers(headers, subdir: 'arrow-flight-glib') @@ -50,6 +58,7 @@ libarrow_flight_glib = library('arrow-flight-glib', dependencies: dependencies, implicit_include_directories: false, include_directories: base_include_directories, + cpp_args: ['-DGAFLIGHT_COMPILATION'], soversion: so_version, version: library_version) arrow_flight_glib = declare_dependency(link_with: libarrow_flight_glib, diff --git a/c_glib/arrow-flight-glib/server.h b/c_glib/arrow-flight-glib/server.h index 89f5a0a596e9e..7e594febb172f 100644 --- a/c_glib/arrow-flight-glib/server.h +++ b/c_glib/arrow-flight-glib/server.h @@ -24,6 +24,7 @@ G_BEGIN_DECLS #define GAFLIGHT_TYPE_DATA_STREAM (gaflight_data_stream_get_type()) +GAFLIGHT_AVAILABLE_IN_6_0 G_DECLARE_DERIVABLE_TYPE( GAFlightDataStream, gaflight_data_stream, GAFLIGHT, DATA_STREAM, GObject) struct _GAFlightDataStreamClass @@ -32,6 +33,7 @@ struct _GAFlightDataStreamClass }; #define GAFLIGHT_TYPE_RECORD_BATCH_STREAM (gaflight_record_batch_stream_get_type()) +GAFLIGHT_AVAILABLE_IN_6_0 G_DECLARE_DERIVABLE_TYPE(GAFlightRecordBatchStream, gaflight_record_batch_stream, GAFLIGHT, @@ -42,12 +44,13 @@ struct _GAFlightRecordBatchStreamClass GAFlightDataStreamClass parent_class; }; -GARROW_AVAILABLE_IN_6_0 +GAFLIGHT_AVAILABLE_IN_6_0 GAFlightRecordBatchStream * gaflight_record_batch_stream_new(GArrowRecordBatchReader *reader, GArrowWriteOptions *options); #define GAFLIGHT_TYPE_MESSAGE_READER (gaflight_message_reader_get_type()) +GAFLIGHT_AVAILABLE_IN_14_0 G_DECLARE_DERIVABLE_TYPE(GAFlightMessageReader, gaflight_message_reader, GAFLIGHT, @@ -58,11 +61,12 @@ struct _GAFlightMessageReaderClass GAFlightRecordBatchReaderClass parent_class; }; -GARROW_AVAILABLE_IN_14_0 +GAFLIGHT_AVAILABLE_IN_14_0 GAFlightDescriptor * gaflight_message_reader_get_descriptor(GAFlightMessageReader *reader); #define GAFLIGHT_TYPE_SERVER_CALL_CONTEXT (gaflight_server_call_context_get_type()) +GAFLIGHT_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE(GAFlightServerCallContext, gaflight_server_call_context, GAFLIGHT, @@ -73,13 +77,14 @@ struct _GAFlightServerCallContextClass GObjectClass parent_class; }; -GARROW_AVAILABLE_IN_14_0 +GAFLIGHT_AVAILABLE_IN_14_0 void gaflight_server_call_context_foreach_incoming_header(GAFlightServerCallContext *context, GAFlightHeaderFunc func, gpointer user_data); #define GAFLIGHT_TYPE_SERVER_AUTH_SENDER (gaflight_server_auth_sender_get_type()) +GAFLIGHT_AVAILABLE_IN_12_0 G_DECLARE_DERIVABLE_TYPE(GAFlightServerAuthSender, gaflight_server_auth_sender, GAFLIGHT, @@ -90,13 +95,14 @@ struct _GAFlightServerAuthSenderClass GObjectClass parent_class; }; -GARROW_AVAILABLE_IN_12_0 +GAFLIGHT_AVAILABLE_IN_12_0 gboolean gaflight_server_auth_sender_write(GAFlightServerAuthSender *sender, GBytes *message, GError **error); #define GAFLIGHT_TYPE_SERVER_AUTH_READER (gaflight_server_auth_reader_get_type()) +GAFLIGHT_AVAILABLE_IN_12_0 G_DECLARE_DERIVABLE_TYPE(GAFlightServerAuthReader, gaflight_server_auth_reader, GAFLIGHT, @@ -107,11 +113,12 @@ struct _GAFlightServerAuthReaderClass GObjectClass parent_class; }; -GARROW_AVAILABLE_IN_12_0 +GAFLIGHT_AVAILABLE_IN_12_0 GBytes * gaflight_server_auth_reader_read(GAFlightServerAuthReader *reader, GError **error); #define GAFLIGHT_TYPE_SERVER_AUTH_HANDLER (gaflight_server_auth_handler_get_type()) +GAFLIGHT_AVAILABLE_IN_12_0 G_DECLARE_DERIVABLE_TYPE(GAFlightServerAuthHandler, gaflight_server_auth_handler, GAFLIGHT, @@ -124,6 +131,7 @@ struct _GAFlightServerAuthHandlerClass #define GAFLIGHT_TYPE_SERVER_CUSTOM_AUTH_HANDLER \ (gaflight_server_custom_auth_handler_get_type()) +GAFLIGHT_AVAILABLE_IN_12_0 G_DECLARE_DERIVABLE_TYPE(GAFlightServerCustomAuthHandler, gaflight_server_custom_auth_handler, GAFLIGHT, @@ -152,7 +160,7 @@ struct _GAFlightServerCustomAuthHandlerClass GError **error); }; -GARROW_AVAILABLE_IN_12_0 +GAFLIGHT_AVAILABLE_IN_12_0 void gaflight_server_custom_auth_handler_authenticate(GAFlightServerCustomAuthHandler *handler, GAFlightServerCallContext *context, @@ -160,7 +168,7 @@ gaflight_server_custom_auth_handler_authenticate(GAFlightServerCustomAuthHandler GAFlightServerAuthReader *reader, GError **error); -GARROW_AVAILABLE_IN_12_0 +GAFLIGHT_AVAILABLE_IN_12_0 GBytes * gaflight_server_custom_auth_handler_is_valid(GAFlightServerCustomAuthHandler *handler, GAFlightServerCallContext *context, @@ -168,6 +176,7 @@ gaflight_server_custom_auth_handler_is_valid(GAFlightServerCustomAuthHandler *ha GError **error); #define GAFLIGHT_TYPE_SERVER_OPTIONS (gaflight_server_options_get_type()) +GAFLIGHT_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE( GAFlightServerOptions, gaflight_server_options, GAFLIGHT, SERVER_OPTIONS, GObject) struct _GAFlightServerOptionsClass @@ -175,14 +184,16 @@ struct _GAFlightServerOptionsClass GObjectClass parent_class; }; -GARROW_AVAILABLE_IN_5_0 +GAFLIGHT_AVAILABLE_IN_5_0 GAFlightServerOptions * gaflight_server_options_new(GAFlightLocation *location); #define GAFLIGHT_TYPE_SERVABLE (gaflight_servable_get_type()) +GAFLIGHT_AVAILABLE_IN_9_0 G_DECLARE_INTERFACE(GAFlightServable, gaflight_servable, GAFLIGHT, SERVABLE, GObject) #define GAFLIGHT_TYPE_SERVER (gaflight_server_get_type()) +GAFLIGHT_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE(GAFlightServer, gaflight_server, GAFLIGHT, SERVER, GObject) /** * GAFlightServerClass: @@ -209,34 +220,34 @@ struct _GAFlightServerClass GError **error); }; -GARROW_AVAILABLE_IN_5_0 +GAFLIGHT_AVAILABLE_IN_5_0 gboolean gaflight_server_listen(GAFlightServer *server, GAFlightServerOptions *options, GError **error); -GARROW_AVAILABLE_IN_5_0 +GAFLIGHT_AVAILABLE_IN_5_0 gint gaflight_server_get_port(GAFlightServer *server); -GARROW_AVAILABLE_IN_5_0 +GAFLIGHT_AVAILABLE_IN_5_0 gboolean gaflight_server_shutdown(GAFlightServer *server, GError **error); -GARROW_AVAILABLE_IN_5_0 +GAFLIGHT_AVAILABLE_IN_5_0 gboolean gaflight_server_wait(GAFlightServer *server, GError **error); -GARROW_AVAILABLE_IN_5_0 +GAFLIGHT_AVAILABLE_IN_5_0 GList * gaflight_server_list_flights(GAFlightServer *server, GAFlightServerCallContext *context, GAFlightCriteria *criteria, GError **error); -GARROW_AVAILABLE_IN_9_0 +GAFLIGHT_AVAILABLE_IN_9_0 GAFlightInfo * gaflight_server_get_flight_info(GAFlightServer *server, GAFlightServerCallContext *context, GAFlightDescriptor *request, GError **error); -GARROW_AVAILABLE_IN_6_0 +GAFLIGHT_AVAILABLE_IN_6_0 GAFlightDataStream * gaflight_server_do_get(GAFlightServer *server, GAFlightServerCallContext *context, diff --git a/c_glib/arrow-flight-glib/server.hpp b/c_glib/arrow-flight-glib/server.hpp index 70da867d5b0e9..ec4815751c8d8 100644 --- a/c_glib/arrow-flight-glib/server.hpp +++ b/c_glib/arrow-flight-glib/server.hpp @@ -23,34 +23,49 @@ #include +GAFLIGHT_EXTERN arrow::flight::FlightDataStream * gaflight_data_stream_get_raw(GAFlightDataStream *stream); +GAFLIGHT_EXTERN GAFlightMessageReader * gaflight_message_reader_new_raw(arrow::flight::FlightMessageReader *flight_reader, gboolean is_owner); + +GAFLIGHT_EXTERN arrow::flight::FlightMessageReader * gaflight_message_reader_get_raw(GAFlightMessageReader *reader); +GAFLIGHT_EXTERN GAFlightServerCallContext * gaflight_server_call_context_new_raw( const arrow::flight::ServerCallContext *flight_call_context); + +GAFLIGHT_EXTERN const arrow::flight::ServerCallContext * gaflight_server_call_context_get_raw(GAFlightServerCallContext *call_context); +GAFLIGHT_EXTERN GAFlightServerAuthSender * gaflight_server_auth_sender_new_raw(arrow::flight::ServerAuthSender *flight_sender); + +GAFLIGHT_EXTERN arrow::flight::ServerAuthSender * gaflight_server_auth_sender_get_raw(GAFlightServerAuthSender *sender); +GAFLIGHT_EXTERN GAFlightServerAuthReader * gaflight_server_auth_reader_new_raw(arrow::flight::ServerAuthReader *flight_reader); + +GAFLIGHT_EXTERN arrow::flight::ServerAuthReader * gaflight_server_auth_reader_get_raw(GAFlightServerAuthReader *reader); +GAFLIGHT_EXTERN std::shared_ptr gaflight_server_auth_handler_get_raw(GAFlightServerAuthHandler *handler); +GAFLIGHT_EXTERN arrow::flight::FlightServerOptions * gaflight_server_options_get_raw(GAFlightServerOptions *options); @@ -61,6 +76,7 @@ struct _GAFlightServableInterface arrow::flight::FlightServerBase *(*get_raw)(GAFlightServable *servable); }; +GAFLIGHT_EXTERN arrow::flight::FlightServerBase * gaflight_servable_get_raw(GAFlightServable *servable); diff --git a/c_glib/arrow-flight-glib/version.h.in b/c_glib/arrow-flight-glib/version.h.in new file mode 100644 index 0000000000000..4a42c7f5aa91e --- /dev/null +++ b/c_glib/arrow-flight-glib/version.h.in @@ -0,0 +1,159 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include + +/** + * SECTION: version + * @section_id: version-macros + * @title: Version related macros + * @include: arrow-flight-glib/arrow-flight-glib.h + * + * Apache Arrow Flight GLib provides macros that can be used by C pre-processor. + * They are useful to check version related things at compile time. + */ + +/** + * GAFLIGHT_VERSION_MAJOR: + * + * The major version. + * + * Since: 17.0.0 + */ +#define GAFLIGHT_VERSION_MAJOR (@VERSION_MAJOR@) + +/** + * GAFLIGHT_VERSION_MINOR: + * + * The minor version. + * + * Since: 17.0.0 + */ +#define GAFLIGHT_VERSION_MINOR (@VERSION_MINOR@) + +/** + * GAFLIGHT_VERSION_MICRO: + * + * The micro version. + * + * Since: 17.0.0 + */ +#define GAFLIGHT_VERSION_MICRO (@VERSION_MICRO@) + +/** + * GAFLIGHT_VERSION_TAG: + * + * The version tag. Normally, it's an empty string. It's "SNAPSHOT" + * for snapshot version. + * + * Since: 17.0.0 + */ +#define GAFLIGHT_VERSION_TAG "@VERSION_TAG@" + +/** + * GAFLIGHT_VERSION_CHECK: + * @major: A major version to check for. + * @minor: A minor version to check for. + * @micro: A micro version to check for. + * + * You can use this macro in C pre-processor. + * + * Returns: %TRUE if the compile time Apache Arrow GLib version is the + * same as or newer than the passed version, %FALSE otherwise. + * + * Since: 17.0.0 + */ +#define GAFLIGHT_VERSION_CHECK(major, minor, micro) \ + (GAFLIGHT_VERSION_MAJOR > (major) || \ + (GAFLIGHT_VERSION_MAJOR == (major) && \ + GAFLIGHT_VERSION_MINOR > (minor)) || \ + (GAFLIGHT_VERSION_MAJOR == (major) && \ + GAFLIGHT_VERSION_MINOR == (minor) && \ + GAFLIGHT_VERSION_MICRO >= (micro))) + +/** + * GAFLIGHT_DISABLE_DEPRECATION_WARNINGS: + * + * If this macro is defined, no deprecated warnings are produced. + * + * You must define this macro before including the + * arrow-glib/arrow-glib.h header. + * + * Since: 17.0.0 + */ + +#ifdef GAFLIGHT_DISABLE_DEPRECATION_WARNINGS +# define GAFLIGHT_DEPRECATED +# define GAFLIGHT_DEPRECATED_FOR(function) +# define GAFLIGHT_UNAVAILABLE(major, minor) +#else +# define GAFLIGHT_DEPRECATED G_DEPRECATED +# define GAFLIGHT_DEPRECATED_FOR(function) G_DEPRECATED_FOR(function) +# define GAFLIGHT_UNAVAILABLE(major, minor) G_UNAVAILABLE(major, minor) +#endif + +@ENCODED_VERSIONS@ + +/** + * GAFLIGHT_VERSION_MIN_REQUIRED: + * + * You can use this macro for compile time API version check. + * + * This macro value must be one of the predefined version macros such + * as %GAFLIGHT_VERSION_0_10. + * + * If you use any functions that is defined by newer version than + * %GAFLIGHT_VERSION_MIN_REQUIRED, deprecated warnings are produced at + * compile time. + * + * You must define this macro before including the + * arrow-flight-glib/arrow-flight-glib.h header. + * + * Since: 17.0.0 + */ +#ifndef GAFLIGHT_VERSION_MIN_REQUIRED +# define GAFLIGHT_VERSION_MIN_REQUIRED GARROW_VERSION_MIN_REQUIRED +#endif + +/** + * GAFLIGHT_VERSION_MAX_ALLOWED: + * + * You can use this macro for compile time API version check. + * + * This macro value must be one of the predefined version macros such + * as %GAFLIGHT_VERSION_0_10. + * + * If you use any functions that is defined by newer version than + * %GAFLIGHT_VERSION_MAX_ALLOWED, deprecated warnings are produced at + * compile time. + * + * You must define this macro before including the + * arrow-flight-glib/arrow-flight-glib.h header. + * + * Since: 17.0.0 + */ +#ifndef GAFLIGHT_VERSION_MAX_ALLOWED +# define GAFLIGHT_VERSION_MAX_ALLOWED GARROW_VERSION_MAX_ALLOWED +#endif + +@VISIBILITY_MACROS@ + +@AVAILABILITY_MACROS@ diff --git a/c_glib/arrow-flight-sql-glib/arrow-flight-sql-glib.h b/c_glib/arrow-flight-sql-glib/arrow-flight-sql-glib.h index 8ebe39aee57a8..94e72d06f2b47 100644 --- a/c_glib/arrow-flight-sql-glib/arrow-flight-sql-glib.h +++ b/c_glib/arrow-flight-sql-glib/arrow-flight-sql-glib.h @@ -19,5 +19,7 @@ #pragma once +#include + #include #include diff --git a/c_glib/arrow-flight-sql-glib/client.h b/c_glib/arrow-flight-sql-glib/client.h index 9a5a8987f7195..b9e9baf41a59f 100644 --- a/c_glib/arrow-flight-sql-glib/client.h +++ b/c_glib/arrow-flight-sql-glib/client.h @@ -21,9 +21,12 @@ #include +#include + G_BEGIN_DECLS #define GAFLIGHTSQL_TYPE_PREPARED_STATEMENT (gaflightsql_prepared_statement_get_type()) +GAFLIGHTSQL_AVAILABLE_IN_14_0 G_DECLARE_DERIVABLE_TYPE(GAFlightSQLPreparedStatement, gaflightsql_prepared_statement, GAFLIGHTSQL, @@ -34,52 +37,53 @@ struct _GAFlightSQLPreparedStatementClass GObjectClass parent_class; }; -GARROW_AVAILABLE_IN_14_0 +GAFLIGHTSQL_AVAILABLE_IN_14_0 GAFlightInfo * gaflightsql_prepared_statement_execute(GAFlightSQLPreparedStatement *statement, GAFlightCallOptions *options, GError **error); -GARROW_AVAILABLE_IN_14_0 +GAFLIGHTSQL_AVAILABLE_IN_14_0 gint64 gaflightsql_prepared_statement_execute_update(GAFlightSQLPreparedStatement *statement, GAFlightCallOptions *options, GError **error); -GARROW_AVAILABLE_IN_14_0 +GAFLIGHTSQL_AVAILABLE_IN_14_0 GArrowSchema * gaflightsql_prepared_statement_get_parameter_schema( GAFlightSQLPreparedStatement *statement); -GARROW_AVAILABLE_IN_14_0 +GAFLIGHTSQL_AVAILABLE_IN_14_0 GArrowSchema * gaflightsql_prepared_statement_get_dataset_schema( GAFlightSQLPreparedStatement *statement); -GARROW_AVAILABLE_IN_14_0 +GAFLIGHTSQL_AVAILABLE_IN_14_0 gboolean gaflightsql_prepared_statement_set_record_batch(GAFlightSQLPreparedStatement *statement, GArrowRecordBatch *record_batch, GError **error); -GARROW_AVAILABLE_IN_14_0 +GAFLIGHTSQL_AVAILABLE_IN_14_0 gboolean gaflightsql_prepared_statement_set_record_batch_reader( GAFlightSQLPreparedStatement *statement, GArrowRecordBatchReader *reader, GError **error); -GARROW_AVAILABLE_IN_14_0 +GAFLIGHTSQL_AVAILABLE_IN_14_0 gboolean gaflightsql_prepared_statement_close(GAFlightSQLPreparedStatement *statement, GAFlightCallOptions *options, GError **error); -GARROW_AVAILABLE_IN_14_0 +GAFLIGHTSQL_AVAILABLE_IN_14_0 gboolean gaflightsql_prepared_statement_is_closed(GAFlightSQLPreparedStatement *statement); #define GAFLIGHTSQL_TYPE_CLIENT (gaflightsql_client_get_type()) +GAFLIGHTSQL_AVAILABLE_IN_9_0 G_DECLARE_DERIVABLE_TYPE( GAFlightSQLClient, gaflightsql_client, GAFLIGHTSQL, CLIENT, GObject) struct _GAFlightSQLClientClass @@ -87,32 +91,32 @@ struct _GAFlightSQLClientClass GObjectClass parent_class; }; -GARROW_AVAILABLE_IN_9_0 +GAFLIGHTSQL_AVAILABLE_IN_9_0 GAFlightSQLClient * gaflightsql_client_new(GAFlightClient *client); -GARROW_AVAILABLE_IN_9_0 +GAFLIGHTSQL_AVAILABLE_IN_9_0 GAFlightInfo * gaflightsql_client_execute(GAFlightSQLClient *client, const gchar *query, GAFlightCallOptions *options, GError **error); -GARROW_AVAILABLE_IN_13_0 +GAFLIGHTSQL_AVAILABLE_IN_13_0 gint64 gaflightsql_client_execute_update(GAFlightSQLClient *client, const gchar *query, GAFlightCallOptions *options, GError **error); -GARROW_AVAILABLE_IN_9_0 +GAFLIGHTSQL_AVAILABLE_IN_9_0 GAFlightStreamReader * gaflightsql_client_do_get(GAFlightSQLClient *client, GAFlightTicket *ticket, GAFlightCallOptions *options, GError **error); -GARROW_AVAILABLE_IN_14_0 +GAFLIGHTSQL_AVAILABLE_IN_14_0 GAFlightSQLPreparedStatement * gaflightsql_client_prepare(GAFlightSQLClient *client, const gchar *query, diff --git a/c_glib/arrow-flight-sql-glib/meson.build b/c_glib/arrow-flight-sql-glib/meson.build index e7abc605bb819..d588ba4917c76 100644 --- a/c_glib/arrow-flight-sql-glib/meson.build +++ b/c_glib/arrow-flight-sql-glib/meson.build @@ -34,6 +34,14 @@ cpp_headers = files( 'server.hpp', ) +version_h = configure_file( + input: 'version.h.in', + output: 'version.h', + command: [python3, generate_version_header_py, '--library', 'GAFLIGHTSQL', '--version', version, '--input', '@INPUT@', '--output', '@OUTPUT@'], +) + +c_headers += version_h + headers = c_headers + cpp_headers install_headers(headers, subdir: 'arrow-flight-sql-glib') @@ -47,6 +55,7 @@ libarrow_flight_sql_glib = library('arrow-flight-sql-glib', dependencies: dependencies, implicit_include_directories: false, include_directories: base_include_directories, + cpp_args: ['-DGAFLIGHTSQL_COMPILATION'], soversion: so_version, version: library_version) arrow_flight_sql_glib = \ diff --git a/c_glib/arrow-flight-sql-glib/server.h b/c_glib/arrow-flight-sql-glib/server.h index d6fd7e4d10394..8cf0aace77644 100644 --- a/c_glib/arrow-flight-sql-glib/server.h +++ b/c_glib/arrow-flight-sql-glib/server.h @@ -21,9 +21,12 @@ #include +#include + G_BEGIN_DECLS #define GAFLIGHTSQL_TYPE_COMMAND (gaflightsql_command_get_type()) +GAFLIGHTSQL_AVAILABLE_IN_9_0 G_DECLARE_DERIVABLE_TYPE( GAFlightSQLCommand, gaflightsql_command, GAFLIGHTSQL, COMMAND, GObject) struct _GAFlightSQLCommandClass @@ -32,6 +35,7 @@ struct _GAFlightSQLCommandClass }; #define GAFLIGHTSQL_TYPE_STATEMENT_QUERY (gaflightsql_statement_query_get_type()) +GAFLIGHTSQL_AVAILABLE_IN_9_0 G_DECLARE_DERIVABLE_TYPE(GAFlightSQLStatementQuery, gaflightsql_statement_query, GAFLIGHTSQL, @@ -42,11 +46,12 @@ struct _GAFlightSQLStatementQueryClass GAFlightSQLCommandClass parent_class; }; -GARROW_AVAILABLE_IN_9_0 +GAFLIGHTSQL_AVAILABLE_IN_9_0 const gchar * gaflightsql_statement_query_get_query(GAFlightSQLStatementQuery *command); #define GAFLIGHTSQL_TYPE_STATEMENT_UPDATE (gaflightsql_statement_update_get_type()) +GAFLIGHTSQL_AVAILABLE_IN_13_0 G_DECLARE_DERIVABLE_TYPE(GAFlightSQLStatementUpdate, gaflightsql_statement_update, GAFLIGHTSQL, @@ -57,12 +62,13 @@ struct _GAFlightSQLStatementUpdateClass GAFlightSQLCommandClass parent_class; }; -GARROW_AVAILABLE_IN_13_0 +GAFLIGHTSQL_AVAILABLE_IN_13_0 const gchar * gaflightsql_statement_update_get_query(GAFlightSQLStatementUpdate *command); #define GAFLIGHTSQL_TYPE_PREPARED_STATEMENT_UPDATE \ (gaflightsql_prepared_statement_update_get_type()) +GAFLIGHTSQL_AVAILABLE_IN_14_0 G_DECLARE_DERIVABLE_TYPE(GAFlightSQLPreparedStatementUpdate, gaflightsql_prepared_statement_update, GAFLIGHTSQL, @@ -73,13 +79,14 @@ struct _GAFlightSQLPreparedStatementUpdateClass GAFlightSQLCommandClass parent_class; }; -GARROW_AVAILABLE_IN_14_0 +GAFLIGHTSQL_AVAILABLE_IN_14_0 GBytes * gaflightsql_prepared_statement_update_get_handle( GAFlightSQLPreparedStatementUpdate *command); #define GAFLIGHTSQL_TYPE_STATEMENT_QUERY_TICKET \ (gaflightsql_statement_query_ticket_get_type()) +GAFLIGHTSQL_AVAILABLE_IN_9_0 G_DECLARE_DERIVABLE_TYPE(GAFlightSQLStatementQueryTicket, gaflightsql_statement_query_ticket, GAFLIGHTSQL, @@ -90,15 +97,16 @@ struct _GAFlightSQLStatementQueryTicketClass GAFlightSQLCommandClass parent_class; }; -GARROW_AVAILABLE_IN_9_0 +GAFLIGHTSQL_AVAILABLE_IN_9_0 GBytes * gaflightsql_statement_query_ticket_generate_handle(const gchar *query, GError **error); -GARROW_AVAILABLE_IN_9_0 +GAFLIGHTSQL_AVAILABLE_IN_9_0 GBytes * gaflightsql_statement_query_ticket_get_handle(GAFlightSQLStatementQueryTicket *command); #define GAFLIGHTSQL_TYPE_CREATE_PREPARED_STATEMENT_REQUEST \ (gaflightsql_create_prepared_statement_request_get_type()) +GAFLIGHTSQL_AVAILABLE_IN_14_0 G_DECLARE_DERIVABLE_TYPE(GAFlightSQLCreatePreparedStatementRequest, gaflightsql_create_prepared_statement_request, GAFLIGHTSQL, @@ -109,18 +117,19 @@ struct _GAFlightSQLCreatePreparedStatementRequestClass GObjectClass parent_class; }; -GARROW_AVAILABLE_IN_14_0 +GAFLIGHTSQL_AVAILABLE_IN_14_0 const gchar * gaflightsql_create_prepared_statement_request_get_query( GAFlightSQLCreatePreparedStatementRequest *request); -GARROW_AVAILABLE_IN_14_0 +GAFLIGHTSQL_AVAILABLE_IN_14_0 const gchar * gaflightsql_create_prepared_statement_request_get_transaction_id( GAFlightSQLCreatePreparedStatementRequest *request); #define GAFLIGHTSQL_TYPE_CREATE_PREPARED_STATEMENT_RESULT \ (gaflightsql_create_prepared_statement_result_get_type()) +GAFLIGHTSQL_AVAILABLE_IN_14_0 G_DECLARE_DERIVABLE_TYPE(GAFlightSQLCreatePreparedStatementResult, gaflightsql_create_prepared_statement_result, GAFLIGHTSQL, @@ -131,36 +140,37 @@ struct _GAFlightSQLCreatePreparedStatementResultClass GObjectClass parent_class; }; -GARROW_AVAILABLE_IN_14_0 +GAFLIGHTSQL_AVAILABLE_IN_14_0 GAFlightSQLCreatePreparedStatementResult * gaflightsql_create_prepared_statement_result_new(void); -GARROW_AVAILABLE_IN_14_0 +GAFLIGHTSQL_AVAILABLE_IN_14_0 void gaflightsql_create_prepared_statement_result_set_dataset_schema( GAFlightSQLCreatePreparedStatementResult *result, GArrowSchema *schema); -GARROW_AVAILABLE_IN_14_0 +GAFLIGHTSQL_AVAILABLE_IN_14_0 GArrowSchema * gaflightsql_create_prepared_statement_result_get_dataset_schema( GAFlightSQLCreatePreparedStatementResult *result); -GARROW_AVAILABLE_IN_14_0 +GAFLIGHTSQL_AVAILABLE_IN_14_0 void gaflightsql_create_prepared_statement_result_set_parameter_schema( GAFlightSQLCreatePreparedStatementResult *result, GArrowSchema *schema); -GARROW_AVAILABLE_IN_14_0 +GAFLIGHTSQL_AVAILABLE_IN_14_0 GArrowSchema * gaflightsql_create_prepared_statement_result_get_parameter_schema( GAFlightSQLCreatePreparedStatementResult *result); -GARROW_AVAILABLE_IN_14_0 +GAFLIGHTSQL_AVAILABLE_IN_14_0 void gaflightsql_create_prepared_statement_result_set_handle( GAFlightSQLCreatePreparedStatementResult *result, GBytes *handle); -GARROW_AVAILABLE_IN_14_0 +GAFLIGHTSQL_AVAILABLE_IN_14_0 GBytes * gaflightsql_create_prepared_statement_result_get_handle( GAFlightSQLCreatePreparedStatementResult *result); #define GAFLIGHTSQL_TYPE_CLOSE_PREPARED_STATEMENT_REQUEST \ (gaflightsql_close_prepared_statement_request_get_type()) +GAFLIGHTSQL_AVAILABLE_IN_14_0 G_DECLARE_DERIVABLE_TYPE(GAFlightSQLClosePreparedStatementRequest, gaflightsql_close_prepared_statement_request, GAFLIGHTSQL, @@ -171,12 +181,13 @@ struct _GAFlightSQLClosePreparedStatementRequestClass GObjectClass parent_class; }; -GARROW_AVAILABLE_IN_14_0 +GAFLIGHTSQL_AVAILABLE_IN_14_0 GBytes * gaflightsql_close_prepared_statement_request_get_handle( GAFlightSQLClosePreparedStatementRequest *request); #define GAFLIGHTSQL_TYPE_SERVER (gaflightsql_server_get_type()) +GAFLIGHTSQL_AVAILABLE_IN_9_0 G_DECLARE_DERIVABLE_TYPE( GAFlightSQLServer, gaflightsql_server, GAFLIGHTSQL, SERVER, GAFlightServer) /** @@ -231,27 +242,27 @@ struct _GAFlightSQLServerClass GError **error); }; -GARROW_AVAILABLE_IN_9_0 +GAFLIGHTSQL_AVAILABLE_IN_9_0 GAFlightInfo * gaflightsql_server_get_flight_info_statement(GAFlightSQLServer *server, GAFlightServerCallContext *context, GAFlightSQLStatementQuery *command, GAFlightDescriptor *descriptor, GError **error); -GARROW_AVAILABLE_IN_9_0 +GAFLIGHTSQL_AVAILABLE_IN_9_0 GAFlightDataStream * gaflightsql_server_do_get_statement(GAFlightSQLServer *server, GAFlightServerCallContext *context, GAFlightSQLStatementQueryTicket *ticket, GError **error); -GARROW_AVAILABLE_IN_13_0 +GAFLIGHTSQL_AVAILABLE_IN_13_0 gint64 gaflightsql_server_do_put_command_statement_update(GAFlightSQLServer *server, GAFlightServerCallContext *context, GAFlightSQLStatementUpdate *command, GError **error); -/* We can restore this after we bump version to 14.0.0-SNAPSHOT. */ -/* GARROW_AVAILABLE_IN_14_0 */ + +GAFLIGHTSQL_AVAILABLE_IN_14_0 gint64 gaflightsql_server_do_put_prepared_statement_update( GAFlightSQLServer *server, @@ -259,16 +270,16 @@ gaflightsql_server_do_put_prepared_statement_update( GAFlightSQLPreparedStatementUpdate *command, GAFlightMessageReader *reader, GError **error); -/* We can restore this after we bump version to 14.0.0-SNAPSHOT. */ -/* GARROW_AVAILABLE_IN_14_0 */ + +GAFLIGHTSQL_AVAILABLE_IN_14_0 GAFlightSQLCreatePreparedStatementResult * gaflightsql_server_create_prepared_statement( GAFlightSQLServer *server, GAFlightServerCallContext *context, GAFlightSQLCreatePreparedStatementRequest *request, GError **error); -/* We can restore this after we bump version to 14.0.0-SNAPSHOT. */ -/* GARROW_AVAILABLE_IN_14_0 */ + +GAFLIGHTSQL_AVAILABLE_IN_14_0 void gaflightsql_server_close_prepared_statement( GAFlightSQLServer *server, diff --git a/c_glib/arrow-flight-sql-glib/version.h.in b/c_glib/arrow-flight-sql-glib/version.h.in new file mode 100644 index 0000000000000..e4373109b9008 --- /dev/null +++ b/c_glib/arrow-flight-sql-glib/version.h.in @@ -0,0 +1,159 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include + +/** + * SECTION: version + * @section_id: version-macros + * @title: Version related macros + * @include: arrow-flight-sql-glib/arrow-flight-sql-glib.h + * + * Apache Arrow Flight SQL GLib provides macros that can be used by C pre-processor. + * They are useful to check version related things at compile time. + */ + +/** + * GAFLIGHTSQL_VERSION_MAJOR: + * + * The major version. + * + * Since: 17.0.0 + */ +#define GAFLIGHTSQL_VERSION_MAJOR (@VERSION_MAJOR@) + +/** + * GAFLIGHTSQL_VERSION_MINOR: + * + * The minor version. + * + * Since: 17.0.0 + */ +#define GAFLIGHTSQL_VERSION_MINOR (@VERSION_MINOR@) + +/** + * GAFLIGHTSQL_VERSION_MICRO: + * + * The micro version. + * + * Since: 17.0.0 + */ +#define GAFLIGHTSQL_VERSION_MICRO (@VERSION_MICRO@) + +/** + * GAFLIGHTSQL_VERSION_TAG: + * + * The version tag. Normally, it's an empty string. It's "SNAPSHOT" + * for snapshot version. + * + * Since: 17.0.0 + */ +#define GAFLIGHTSQL_VERSION_TAG "@VERSION_TAG@" + +/** + * GAFLIGHTSQL_VERSION_CHECK: + * @major: A major version to check for. + * @minor: A minor version to check for. + * @micro: A micro version to check for. + * + * You can use this macro in C pre-processor. + * + * Returns: %TRUE if the compile time Apache Arrow GLib version is the + * same as or newer than the passed version, %FALSE otherwise. + * + * Since: 17.0.0 + */ +#define GAFLIGHTSQL_VERSION_CHECK(major, minor, micro) \ + (GAFLIGHTSQL_VERSION_MAJOR > (major) || \ + (GAFLIGHTSQL_VERSION_MAJOR == (major) && \ + GAFLIGHTSQL_VERSION_MINOR > (minor)) || \ + (GAFLIGHTSQL_VERSION_MAJOR == (major) && \ + GAFLIGHTSQL_VERSION_MINOR == (minor) && \ + GAFLIGHTSQL_VERSION_MICRO >= (micro))) + +/** + * GAFLIGHTSQL_DISABLE_DEPRECATION_WARNINGS: + * + * If this macro is defined, no deprecated warnings are produced. + * + * You must define this macro before including the + * arrow-glib/arrow-glib.h header. + * + * Since: 17.0.0 + */ + +#ifdef GAFLIGHTSQL_DISABLE_DEPRECATION_WARNINGS +# define GAFLIGHTSQL_DEPRECATED +# define GAFLIGHTSQL_DEPRECATED_FOR(function) +# define GAFLIGHTSQL_UNAVAILABLE(major, minor) +#else +# define GAFLIGHTSQL_DEPRECATED G_DEPRECATED +# define GAFLIGHTSQL_DEPRECATED_FOR(function) G_DEPRECATED_FOR(function) +# define GAFLIGHTSQL_UNAVAILABLE(major, minor) G_UNAVAILABLE(major, minor) +#endif + +@ENCODED_VERSIONS@ + +/** + * GAFLIGHTSQL_VERSION_MIN_REQUIRED: + * + * You can use this macro for compile time API version check. + * + * This macro value must be one of the predefined version macros such + * as %GAFLIGHTSQL_VERSION_0_10. + * + * If you use any functions that is defined by newer version than + * %GAFLIGHTSQL_VERSION_MIN_REQUIRED, deprecated warnings are produced at + * compile time. + * + * You must define this macro before including the + * arrow-flight-sql-glib/arrow-flight-sql-glib.h header. + * + * Since: 17.0.0 + */ +#ifndef GAFLIGHTSQL_VERSION_MIN_REQUIRED +# define GAFLIGHTSQL_VERSION_MIN_REQUIRED GARROW_VERSION_MIN_REQUIRED +#endif + +/** + * GAFLIGHTSQL_VERSION_MAX_ALLOWED: + * + * You can use this macro for compile time API version check. + * + * This macro value must be one of the predefined version macros such + * as %GAFLIGHTSQL_VERSION_0_10. + * + * If you use any functions that is defined by newer version than + * %GAFLIGHTSQL_VERSION_MAX_ALLOWED, deprecated warnings are produced at + * compile time. + * + * You must define this macro before including the + * arrow-flight-sql-glib/arrow-flight-sql-glib.h header. + * + * Since: 17.0.0 + */ +#ifndef GAFLIGHTSQL_VERSION_MAX_ALLOWED +# define GAFLIGHTSQL_VERSION_MAX_ALLOWED GARROW_VERSION_MAX_ALLOWED +#endif + +@VISIBILITY_MACROS@ + +@AVAILABILITY_MACROS@ diff --git a/c_glib/arrow-glib/array-builder.cpp b/c_glib/arrow-glib/array-builder.cpp index b498ecb51cedb..9b7c608ca8a5b 100644 --- a/c_glib/arrow-glib/array-builder.cpp +++ b/c_glib/arrow-glib/array-builder.cpp @@ -231,8 +231,8 @@ garrow_array_builder_append_values(GArrowArrayBuilder *builder, if (n_remains > 0) { ++n_loops; } + std::vector data(value_size * chunk_size); for (gint64 i = 0; i < n_loops; ++i) { - uint8_t data[value_size * chunk_size]; uint8_t *valid_bytes = nullptr; uint8_t valid_bytes_buffer[chunk_size]; if (is_valids_length > 0) { @@ -255,7 +255,7 @@ garrow_array_builder_append_values(GArrowArrayBuilder *builder, value = values[offset + j]; } if (value) { - get_value_function(data + (value_size * j), value, value_size); + get_value_function(data.data() + (value_size * j), value, value_size); } else { is_valid = false; if (!valid_bytes) { @@ -267,7 +267,7 @@ garrow_array_builder_append_values(GArrowArrayBuilder *builder, valid_bytes_buffer[j] = is_valid; } } - auto status = arrow_builder->AppendValues(data, n_values, valid_bytes); + auto status = arrow_builder->AppendValues(data.data(), n_values, valid_bytes); if (!garrow_error_check(error, status, context)) { return FALSE; } @@ -1035,13 +1035,13 @@ garrow_boolean_array_builder_append_values(GArrowBooleanArrayBuilder *builder, gint64 is_valids_length, GError **error) { - guint8 arrow_values[values_length]; + std::vector arrow_values(values_length); for (gint64 i = 0; i < values_length; ++i) { arrow_values[i] = values[i]; } return garrow_array_builder_append_values( GARROW_ARRAY_BUILDER(builder), - arrow_values, + arrow_values.data(), values_length, is_valids, is_valids_length, diff --git a/c_glib/arrow-glib/array-builder.h b/c_glib/arrow-glib/array-builder.h index 8a1385b9b8c1b..6a0d0154833a7 100644 --- a/c_glib/arrow-glib/array-builder.h +++ b/c_glib/arrow-glib/array-builder.h @@ -26,6 +26,7 @@ G_BEGIN_DECLS #define GARROW_TYPE_ARRAY_BUILDER (garrow_array_builder_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE( GArrowArrayBuilder, garrow_array_builder, GARROW, ARRAY_BUILDER, GObject) struct _GArrowArrayBuilderClass @@ -33,11 +34,15 @@ struct _GArrowArrayBuilderClass GObjectClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowDataType * garrow_array_builder_get_value_data_type(GArrowArrayBuilder *builder); + +GARROW_AVAILABLE_IN_ALL GArrowType garrow_array_builder_get_value_type(GArrowArrayBuilder *builder); +GARROW_AVAILABLE_IN_ALL GArrowArray * garrow_array_builder_finish(GArrowArrayBuilder *builder, GError **error); @@ -86,6 +91,7 @@ garrow_array_builder_append_empty_values(GArrowArrayBuilder *builder, GError **error); #define GARROW_TYPE_NULL_ARRAY_BUILDER (garrow_null_array_builder_get_type()) +GARROW_AVAILABLE_IN_0_13 G_DECLARE_DERIVABLE_TYPE(GArrowNullArrayBuilder, garrow_null_array_builder, GARROW, @@ -114,6 +120,7 @@ garrow_null_array_builder_append_nulls(GArrowNullArrayBuilder *builder, #endif #define GARROW_TYPE_BOOLEAN_ARRAY_BUILDER (garrow_boolean_array_builder_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowBooleanArrayBuilder, garrow_boolean_array_builder, GARROW, @@ -124,10 +131,12 @@ struct _GArrowBooleanArrayBuilderClass GArrowArrayBuilderClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowBooleanArrayBuilder * garrow_boolean_array_builder_new(void); #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_0_12_FOR(garrow_boolean_array_builder_append_value) gboolean garrow_boolean_array_builder_append(GArrowBooleanArrayBuilder *builder, @@ -139,6 +148,8 @@ gboolean garrow_boolean_array_builder_append_value(GArrowBooleanArrayBuilder *builder, gboolean value, GError **error); + +GARROW_AVAILABLE_IN_ALL gboolean garrow_boolean_array_builder_append_values(GArrowBooleanArrayBuilder *builder, const gboolean *values, @@ -159,6 +170,7 @@ garrow_boolean_array_builder_append_nulls(GArrowBooleanArrayBuilder *builder, #endif #define GARROW_TYPE_INT_ARRAY_BUILDER (garrow_int_array_builder_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowIntArrayBuilder, garrow_int_array_builder, GARROW, @@ -169,10 +181,12 @@ struct _GArrowIntArrayBuilderClass GArrowArrayBuilderClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowIntArrayBuilder * garrow_int_array_builder_new(void); #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_0_12_FOR(garrow_int_array_builder_append_value) gboolean garrow_int_array_builder_append(GArrowIntArrayBuilder *builder, @@ -184,6 +198,7 @@ gboolean garrow_int_array_builder_append_value(GArrowIntArrayBuilder *builder, gint64 value, GError **error); +GARROW_AVAILABLE_IN_ALL gboolean garrow_int_array_builder_append_values(GArrowIntArrayBuilder *builder, const gint64 *values, @@ -192,9 +207,12 @@ garrow_int_array_builder_append_values(GArrowIntArrayBuilder *builder, gint64 is_valids_length, GError **error); #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_null) gboolean garrow_int_array_builder_append_null(GArrowIntArrayBuilder *builder, GError **error); + +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_nulls) gboolean garrow_int_array_builder_append_nulls(GArrowIntArrayBuilder *builder, @@ -203,6 +221,7 @@ garrow_int_array_builder_append_nulls(GArrowIntArrayBuilder *builder, #endif #define GARROW_TYPE_UINT_ARRAY_BUILDER (garrow_uint_array_builder_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowUIntArrayBuilder, garrow_uint_array_builder, GARROW, @@ -213,10 +232,12 @@ struct _GArrowUIntArrayBuilderClass GArrowArrayBuilderClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowUIntArrayBuilder * garrow_uint_array_builder_new(void); #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_0_12_FOR(garrow_uint_array_builder_append_value) gboolean garrow_uint_array_builder_append(GArrowUIntArrayBuilder *builder, @@ -228,6 +249,8 @@ gboolean garrow_uint_array_builder_append_value(GArrowUIntArrayBuilder *builder, guint64 value, GError **error); + +GARROW_AVAILABLE_IN_ALL gboolean garrow_uint_array_builder_append_values(GArrowUIntArrayBuilder *builder, const guint64 *values, @@ -236,9 +259,12 @@ garrow_uint_array_builder_append_values(GArrowUIntArrayBuilder *builder, gint64 is_valids_length, GError **error); #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_null) gboolean garrow_uint_array_builder_append_null(GArrowUIntArrayBuilder *builder, GError **error); + +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_nulls) gboolean garrow_uint_array_builder_append_nulls(GArrowUIntArrayBuilder *builder, @@ -247,6 +273,7 @@ garrow_uint_array_builder_append_nulls(GArrowUIntArrayBuilder *builder, #endif #define GARROW_TYPE_INT8_ARRAY_BUILDER (garrow_int8_array_builder_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowInt8ArrayBuilder, garrow_int8_array_builder, GARROW, @@ -257,10 +284,12 @@ struct _GArrowInt8ArrayBuilderClass GArrowArrayBuilderClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowInt8ArrayBuilder * garrow_int8_array_builder_new(void); #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_0_12_FOR(garrow_int8_array_builder_append_value) gboolean garrow_int8_array_builder_append(GArrowInt8ArrayBuilder *builder, @@ -272,6 +301,8 @@ gboolean garrow_int8_array_builder_append_value(GArrowInt8ArrayBuilder *builder, gint8 value, GError **error); + +GARROW_AVAILABLE_IN_ALL gboolean garrow_int8_array_builder_append_values(GArrowInt8ArrayBuilder *builder, const gint8 *values, @@ -280,9 +311,12 @@ garrow_int8_array_builder_append_values(GArrowInt8ArrayBuilder *builder, gint64 is_valids_length, GError **error); #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_null) gboolean garrow_int8_array_builder_append_null(GArrowInt8ArrayBuilder *builder, GError **error); + +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_nulls) gboolean garrow_int8_array_builder_append_nulls(GArrowInt8ArrayBuilder *builder, @@ -291,6 +325,7 @@ garrow_int8_array_builder_append_nulls(GArrowInt8ArrayBuilder *builder, #endif #define GARROW_TYPE_UINT8_ARRAY_BUILDER (garrow_uint8_array_builder_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowUInt8ArrayBuilder, garrow_uint8_array_builder, GARROW, @@ -301,10 +336,12 @@ struct _GArrowUInt8ArrayBuilderClass GArrowArrayBuilderClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowUInt8ArrayBuilder * garrow_uint8_array_builder_new(void); #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_0_12_FOR(garrow_uint8_array_builder_append_value) gboolean garrow_uint8_array_builder_append(GArrowUInt8ArrayBuilder *builder, @@ -316,6 +353,8 @@ gboolean garrow_uint8_array_builder_append_value(GArrowUInt8ArrayBuilder *builder, guint8 value, GError **error); + +GARROW_AVAILABLE_IN_ALL gboolean garrow_uint8_array_builder_append_values(GArrowUInt8ArrayBuilder *builder, const guint8 *values, @@ -324,9 +363,12 @@ garrow_uint8_array_builder_append_values(GArrowUInt8ArrayBuilder *builder, gint64 is_valids_length, GError **error); #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_null) gboolean garrow_uint8_array_builder_append_null(GArrowUInt8ArrayBuilder *builder, GError **error); + +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_nulls) gboolean garrow_uint8_array_builder_append_nulls(GArrowUInt8ArrayBuilder *builder, @@ -335,6 +377,7 @@ garrow_uint8_array_builder_append_nulls(GArrowUInt8ArrayBuilder *builder, #endif #define GARROW_TYPE_INT16_ARRAY_BUILDER (garrow_int16_array_builder_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowInt16ArrayBuilder, garrow_int16_array_builder, GARROW, @@ -345,6 +388,7 @@ struct _GArrowInt16ArrayBuilderClass GArrowArrayBuilderClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowInt16ArrayBuilder * garrow_int16_array_builder_new(void); @@ -360,6 +404,7 @@ gboolean garrow_int16_array_builder_append_value(GArrowInt16ArrayBuilder *builder, gint16 value, GError **error); +GARROW_AVAILABLE_IN_ALL gboolean garrow_int16_array_builder_append_values(GArrowInt16ArrayBuilder *builder, const gint16 *values, @@ -368,9 +413,12 @@ garrow_int16_array_builder_append_values(GArrowInt16ArrayBuilder *builder, gint64 is_valids_length, GError **error); #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_null) gboolean garrow_int16_array_builder_append_null(GArrowInt16ArrayBuilder *builder, GError **error); + +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_nulls) gboolean garrow_int16_array_builder_append_nulls(GArrowInt16ArrayBuilder *builder, @@ -379,6 +427,7 @@ garrow_int16_array_builder_append_nulls(GArrowInt16ArrayBuilder *builder, #endif #define GARROW_TYPE_UINT16_ARRAY_BUILDER (garrow_uint16_array_builder_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowUInt16ArrayBuilder, garrow_uint16_array_builder, GARROW, @@ -389,10 +438,12 @@ struct _GArrowUInt16ArrayBuilderClass GArrowArrayBuilderClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowUInt16ArrayBuilder * garrow_uint16_array_builder_new(void); #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_0_12_FOR(garrow_uint16_array_builder_append_value) gboolean garrow_uint16_array_builder_append(GArrowUInt16ArrayBuilder *builder, @@ -404,6 +455,8 @@ gboolean garrow_uint16_array_builder_append_value(GArrowUInt16ArrayBuilder *builder, guint16 value, GError **error); + +GARROW_AVAILABLE_IN_ALL gboolean garrow_uint16_array_builder_append_values(GArrowUInt16ArrayBuilder *builder, const guint16 *values, @@ -412,10 +465,13 @@ garrow_uint16_array_builder_append_values(GArrowUInt16ArrayBuilder *builder, gint64 is_valids_length, GError **error); #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_null) gboolean garrow_uint16_array_builder_append_null(GArrowUInt16ArrayBuilder *builder, GError **error); + +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_nulls) gboolean garrow_uint16_array_builder_append_nulls(GArrowUInt16ArrayBuilder *builder, @@ -424,6 +480,7 @@ garrow_uint16_array_builder_append_nulls(GArrowUInt16ArrayBuilder *builder, #endif #define GARROW_TYPE_INT32_ARRAY_BUILDER (garrow_int32_array_builder_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowInt32ArrayBuilder, garrow_int32_array_builder, GARROW, @@ -434,10 +491,12 @@ struct _GArrowInt32ArrayBuilderClass GArrowArrayBuilderClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowInt32ArrayBuilder * garrow_int32_array_builder_new(void); #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_0_12_FOR(garrow_int32_array_builder_append_value) gboolean garrow_int32_array_builder_append(GArrowInt32ArrayBuilder *builder, @@ -449,6 +508,8 @@ gboolean garrow_int32_array_builder_append_value(GArrowInt32ArrayBuilder *builder, gint32 value, GError **error); + +GARROW_AVAILABLE_IN_ALL gboolean garrow_int32_array_builder_append_values(GArrowInt32ArrayBuilder *builder, const gint32 *values, @@ -457,9 +518,12 @@ garrow_int32_array_builder_append_values(GArrowInt32ArrayBuilder *builder, gint64 is_valids_length, GError **error); #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_null) gboolean garrow_int32_array_builder_append_null(GArrowInt32ArrayBuilder *builder, GError **error); + +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_nulls) gboolean garrow_int32_array_builder_append_nulls(GArrowInt32ArrayBuilder *builder, @@ -468,6 +532,7 @@ garrow_int32_array_builder_append_nulls(GArrowInt32ArrayBuilder *builder, #endif #define GARROW_TYPE_UINT32_ARRAY_BUILDER (garrow_uint32_array_builder_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowUInt32ArrayBuilder, garrow_uint32_array_builder, GARROW, @@ -478,10 +543,12 @@ struct _GArrowUInt32ArrayBuilderClass GArrowArrayBuilderClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowUInt32ArrayBuilder * garrow_uint32_array_builder_new(void); #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_0_12_FOR(garrow_uint32_array_builder_append_value) gboolean garrow_uint32_array_builder_append(GArrowUInt32ArrayBuilder *builder, @@ -493,6 +560,8 @@ gboolean garrow_uint32_array_builder_append_value(GArrowUInt32ArrayBuilder *builder, guint32 value, GError **error); + +GARROW_AVAILABLE_IN_ALL gboolean garrow_uint32_array_builder_append_values(GArrowUInt32ArrayBuilder *builder, const guint32 *values, @@ -501,10 +570,13 @@ garrow_uint32_array_builder_append_values(GArrowUInt32ArrayBuilder *builder, gint64 is_valids_length, GError **error); #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_null) gboolean garrow_uint32_array_builder_append_null(GArrowUInt32ArrayBuilder *builder, GError **error); + +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_nulls) gboolean garrow_uint32_array_builder_append_nulls(GArrowUInt32ArrayBuilder *builder, @@ -513,6 +585,7 @@ garrow_uint32_array_builder_append_nulls(GArrowUInt32ArrayBuilder *builder, #endif #define GARROW_TYPE_INT64_ARRAY_BUILDER (garrow_int64_array_builder_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowInt64ArrayBuilder, garrow_int64_array_builder, GARROW, @@ -523,10 +596,12 @@ struct _GArrowInt64ArrayBuilderClass GArrowArrayBuilderClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowInt64ArrayBuilder * garrow_int64_array_builder_new(void); #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_0_12_FOR(garrow_int64_array_builder_append_value) gboolean garrow_int64_array_builder_append(GArrowInt64ArrayBuilder *builder, @@ -538,6 +613,8 @@ gboolean garrow_int64_array_builder_append_value(GArrowInt64ArrayBuilder *builder, gint64 value, GError **error); + +GARROW_AVAILABLE_IN_ALL gboolean garrow_int64_array_builder_append_values(GArrowInt64ArrayBuilder *builder, const gint64 *values, @@ -546,9 +623,12 @@ garrow_int64_array_builder_append_values(GArrowInt64ArrayBuilder *builder, gint64 is_valids_length, GError **error); #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_null) gboolean garrow_int64_array_builder_append_null(GArrowInt64ArrayBuilder *builder, GError **error); + +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_nulls) gboolean garrow_int64_array_builder_append_nulls(GArrowInt64ArrayBuilder *builder, @@ -557,6 +637,7 @@ garrow_int64_array_builder_append_nulls(GArrowInt64ArrayBuilder *builder, #endif #define GARROW_TYPE_UINT64_ARRAY_BUILDER (garrow_uint64_array_builder_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowUInt64ArrayBuilder, garrow_uint64_array_builder, GARROW, @@ -567,10 +648,12 @@ struct _GArrowUInt64ArrayBuilderClass GArrowArrayBuilderClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowUInt64ArrayBuilder * garrow_uint64_array_builder_new(void); #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_0_12_FOR(garrow_uint64_array_builder_append_value) gboolean garrow_uint64_array_builder_append(GArrowUInt64ArrayBuilder *builder, @@ -582,6 +665,8 @@ gboolean garrow_uint64_array_builder_append_value(GArrowUInt64ArrayBuilder *builder, guint64 value, GError **error); + +GARROW_AVAILABLE_IN_ALL gboolean garrow_uint64_array_builder_append_values(GArrowUInt64ArrayBuilder *builder, const guint64 *values, @@ -590,10 +675,13 @@ garrow_uint64_array_builder_append_values(GArrowUInt64ArrayBuilder *builder, gint64 is_valids_length, GError **error); #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_null) gboolean garrow_uint64_array_builder_append_null(GArrowUInt64ArrayBuilder *builder, GError **error); + +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_nulls) gboolean garrow_uint64_array_builder_append_nulls(GArrowUInt64ArrayBuilder *builder, @@ -602,6 +690,7 @@ garrow_uint64_array_builder_append_nulls(GArrowUInt64ArrayBuilder *builder, #endif #define GARROW_TYPE_HALF_FLOAT_ARRAY_BUILDER (garrow_half_float_array_builder_get_type()) +GARROW_AVAILABLE_IN_11_0 G_DECLARE_DERIVABLE_TYPE(GArrowHalfFloatArrayBuilder, garrow_half_float_array_builder, GARROW, @@ -631,6 +720,7 @@ garrow_half_float_array_builder_append_values(GArrowHalfFloatArrayBuilder *build GError **error); #define GARROW_TYPE_FLOAT_ARRAY_BUILDER (garrow_float_array_builder_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowFloatArrayBuilder, garrow_float_array_builder, GARROW, @@ -641,10 +731,12 @@ struct _GArrowFloatArrayBuilderClass GArrowArrayBuilderClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowFloatArrayBuilder * garrow_float_array_builder_new(void); #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_0_12_FOR(garrow_float_array_builder_append_value) gboolean garrow_float_array_builder_append(GArrowFloatArrayBuilder *builder, @@ -656,6 +748,8 @@ gboolean garrow_float_array_builder_append_value(GArrowFloatArrayBuilder *builder, gfloat value, GError **error); + +GARROW_AVAILABLE_IN_ALL gboolean garrow_float_array_builder_append_values(GArrowFloatArrayBuilder *builder, const gfloat *values, @@ -664,9 +758,12 @@ garrow_float_array_builder_append_values(GArrowFloatArrayBuilder *builder, gint64 is_valids_length, GError **error); #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_null) gboolean garrow_float_array_builder_append_null(GArrowFloatArrayBuilder *builder, GError **error); + +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_nulls) gboolean garrow_float_array_builder_append_nulls(GArrowFloatArrayBuilder *builder, @@ -675,6 +772,7 @@ garrow_float_array_builder_append_nulls(GArrowFloatArrayBuilder *builder, #endif #define GARROW_TYPE_DOUBLE_ARRAY_BUILDER (garrow_double_array_builder_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowDoubleArrayBuilder, garrow_double_array_builder, GARROW, @@ -685,10 +783,12 @@ struct _GArrowDoubleArrayBuilderClass GArrowArrayBuilderClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowDoubleArrayBuilder * garrow_double_array_builder_new(void); #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_0_12_FOR(garrow_double_array_builder_append_value) gboolean garrow_double_array_builder_append(GArrowDoubleArrayBuilder *builder, @@ -700,6 +800,8 @@ gboolean garrow_double_array_builder_append_value(GArrowDoubleArrayBuilder *builder, gdouble value, GError **error); + +GARROW_AVAILABLE_IN_ALL gboolean garrow_double_array_builder_append_values(GArrowDoubleArrayBuilder *builder, const gdouble *values, @@ -708,10 +810,13 @@ garrow_double_array_builder_append_values(GArrowDoubleArrayBuilder *builder, gint64 is_valids_length, GError **error); #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_null) gboolean garrow_double_array_builder_append_null(GArrowDoubleArrayBuilder *builder, GError **error); + +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_nulls) gboolean garrow_double_array_builder_append_nulls(GArrowDoubleArrayBuilder *builder, @@ -720,6 +825,7 @@ garrow_double_array_builder_append_nulls(GArrowDoubleArrayBuilder *builder, #endif #define GARROW_TYPE_BINARY_ARRAY_BUILDER (garrow_binary_array_builder_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowBinaryArrayBuilder, garrow_binary_array_builder, GARROW, @@ -730,10 +836,12 @@ struct _GArrowBinaryArrayBuilderClass GArrowArrayBuilderClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowBinaryArrayBuilder * garrow_binary_array_builder_new(void); #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_0_12_FOR(garrow_binary_array_builder_append_value) gboolean garrow_binary_array_builder_append(GArrowBinaryArrayBuilder *builder, @@ -761,10 +869,12 @@ garrow_binary_array_builder_append_values(GArrowBinaryArrayBuilder *builder, gint64 is_valids_length, GError **error); #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_null) gboolean garrow_binary_array_builder_append_null(GArrowBinaryArrayBuilder *builder, GError **error); + GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_nulls) GARROW_AVAILABLE_IN_0_16 gboolean @@ -775,6 +885,7 @@ garrow_binary_array_builder_append_nulls(GArrowBinaryArrayBuilder *builder, #define GARROW_TYPE_LARGE_BINARY_ARRAY_BUILDER \ (garrow_large_binary_array_builder_get_type()) +GARROW_AVAILABLE_IN_0_16 G_DECLARE_DERIVABLE_TYPE(GArrowLargeBinaryArrayBuilder, garrow_large_binary_array_builder, GARROW, @@ -821,6 +932,7 @@ garrow_large_binary_array_builder_append_nulls(GArrowLargeBinaryArrayBuilder *bu #endif #define GARROW_TYPE_STRING_ARRAY_BUILDER (garrow_string_array_builder_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowStringArrayBuilder, garrow_string_array_builder, GARROW, @@ -831,10 +943,12 @@ struct _GArrowStringArrayBuilderClass GArrowBinaryArrayBuilderClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowStringArrayBuilder * garrow_string_array_builder_new(void); #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_0_12_FOR(garrow_string_array_builder_append_value) gboolean garrow_string_array_builder_append(GArrowStringArrayBuilder *builder, @@ -863,6 +977,7 @@ garrow_string_array_builder_append_string_len(GArrowStringArrayBuilder *builder, GError **error); #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_0_16_FOR(garrow_string_array_builder_append_strings) gboolean garrow_string_array_builder_append_values(GArrowStringArrayBuilder *builder, @@ -883,6 +998,7 @@ garrow_string_array_builder_append_strings(GArrowStringArrayBuilder *builder, #define GARROW_TYPE_LARGE_STRING_ARRAY_BUILDER \ (garrow_large_string_array_builder_get_type()) +GARROW_AVAILABLE_IN_0_16 G_DECLARE_DERIVABLE_TYPE(GArrowLargeStringArrayBuilder, garrow_large_string_array_builder, GARROW, @@ -919,6 +1035,7 @@ garrow_large_string_array_builder_append_strings(GArrowLargeStringArrayBuilder * #define GARROW_TYPE_FIXED_SIZE_BINARY_ARRAY_BUILDER \ (garrow_fixed_size_binary_array_builder_get_type()) +GARROW_AVAILABLE_IN_3_0 G_DECLARE_DERIVABLE_TYPE(GArrowFixedSizeBinaryArrayBuilder, garrow_fixed_size_binary_array_builder, GARROW, @@ -963,6 +1080,7 @@ garrow_fixed_size_binary_array_builder_append_values_packed( GError **error); #define GARROW_TYPE_DATE32_ARRAY_BUILDER (garrow_date32_array_builder_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowDate32ArrayBuilder, garrow_date32_array_builder, GARROW, @@ -973,10 +1091,12 @@ struct _GArrowDate32ArrayBuilderClass GArrowArrayBuilderClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowDate32ArrayBuilder * garrow_date32_array_builder_new(void); #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_0_12_FOR(garrow_date32_array_builder_append_value) gboolean garrow_date32_array_builder_append(GArrowDate32ArrayBuilder *builder, @@ -988,6 +1108,8 @@ gboolean garrow_date32_array_builder_append_value(GArrowDate32ArrayBuilder *builder, gint32 value, GError **error); + +GARROW_AVAILABLE_IN_ALL gboolean garrow_date32_array_builder_append_values(GArrowDate32ArrayBuilder *builder, const gint32 *values, @@ -996,10 +1118,13 @@ garrow_date32_array_builder_append_values(GArrowDate32ArrayBuilder *builder, gint64 is_valids_length, GError **error); #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_null) gboolean garrow_date32_array_builder_append_null(GArrowDate32ArrayBuilder *builder, GError **error); + +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_nulls) gboolean garrow_date32_array_builder_append_nulls(GArrowDate32ArrayBuilder *builder, @@ -1008,6 +1133,7 @@ garrow_date32_array_builder_append_nulls(GArrowDate32ArrayBuilder *builder, #endif #define GARROW_TYPE_DATE64_ARRAY_BUILDER (garrow_date64_array_builder_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowDate64ArrayBuilder, garrow_date64_array_builder, GARROW, @@ -1018,10 +1144,12 @@ struct _GArrowDate64ArrayBuilderClass GArrowArrayBuilderClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowDate64ArrayBuilder * garrow_date64_array_builder_new(void); #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_0_12_FOR(garrow_date64_array_builder_append_value) gboolean garrow_date64_array_builder_append(GArrowDate64ArrayBuilder *builder, @@ -1033,6 +1161,8 @@ gboolean garrow_date64_array_builder_append_value(GArrowDate64ArrayBuilder *builder, gint64 value, GError **error); + +GARROW_AVAILABLE_IN_ALL gboolean garrow_date64_array_builder_append_values(GArrowDate64ArrayBuilder *builder, const gint64 *values, @@ -1041,10 +1171,13 @@ garrow_date64_array_builder_append_values(GArrowDate64ArrayBuilder *builder, gint64 is_valids_length, GError **error); #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_null) gboolean garrow_date64_array_builder_append_null(GArrowDate64ArrayBuilder *builder, GError **error); + +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_nulls) gboolean garrow_date64_array_builder_append_nulls(GArrowDate64ArrayBuilder *builder, @@ -1053,6 +1186,7 @@ garrow_date64_array_builder_append_nulls(GArrowDate64ArrayBuilder *builder, #endif #define GARROW_TYPE_TIMESTAMP_ARRAY_BUILDER (garrow_timestamp_array_builder_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowTimestampArrayBuilder, garrow_timestamp_array_builder, GARROW, @@ -1063,10 +1197,12 @@ struct _GArrowTimestampArrayBuilderClass GArrowArrayBuilderClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowTimestampArrayBuilder * garrow_timestamp_array_builder_new(GArrowTimestampDataType *data_type); #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_0_12_FOR(garrow_timestamp_array_builder_append_value) gboolean garrow_timestamp_array_builder_append(GArrowTimestampArrayBuilder *builder, @@ -1078,6 +1214,8 @@ gboolean garrow_timestamp_array_builder_append_value(GArrowTimestampArrayBuilder *builder, gint64 value, GError **error); + +GARROW_AVAILABLE_IN_ALL gboolean garrow_timestamp_array_builder_append_values(GArrowTimestampArrayBuilder *builder, const gint64 *values, @@ -1086,10 +1224,13 @@ garrow_timestamp_array_builder_append_values(GArrowTimestampArrayBuilder *builde gint64 is_valids_length, GError **error); #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_null) gboolean garrow_timestamp_array_builder_append_null(GArrowTimestampArrayBuilder *builder, GError **error); + +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_nulls) gboolean garrow_timestamp_array_builder_append_nulls(GArrowTimestampArrayBuilder *builder, @@ -1098,6 +1239,7 @@ garrow_timestamp_array_builder_append_nulls(GArrowTimestampArrayBuilder *builder #endif #define GARROW_TYPE_TIME32_ARRAY_BUILDER (garrow_time32_array_builder_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowTime32ArrayBuilder, garrow_time32_array_builder, GARROW, @@ -1108,10 +1250,12 @@ struct _GArrowTime32ArrayBuilderClass GArrowArrayBuilderClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowTime32ArrayBuilder * garrow_time32_array_builder_new(GArrowTime32DataType *data_type); #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_0_12_FOR(garrow_time32_array_builder_append_value) gboolean garrow_time32_array_builder_append(GArrowTime32ArrayBuilder *builder, @@ -1123,6 +1267,8 @@ gboolean garrow_time32_array_builder_append_value(GArrowTime32ArrayBuilder *builder, gint32 value, GError **error); + +GARROW_AVAILABLE_IN_ALL gboolean garrow_time32_array_builder_append_values(GArrowTime32ArrayBuilder *builder, const gint32 *values, @@ -1131,10 +1277,13 @@ garrow_time32_array_builder_append_values(GArrowTime32ArrayBuilder *builder, gint64 is_valids_length, GError **error); #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_null) gboolean garrow_time32_array_builder_append_null(GArrowTime32ArrayBuilder *builder, GError **error); + +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_nulls) gboolean garrow_time32_array_builder_append_nulls(GArrowTime32ArrayBuilder *builder, @@ -1143,6 +1292,7 @@ garrow_time32_array_builder_append_nulls(GArrowTime32ArrayBuilder *builder, #endif #define GARROW_TYPE_TIME64_ARRAY_BUILDER (garrow_time64_array_builder_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowTime64ArrayBuilder, garrow_time64_array_builder, GARROW, @@ -1153,10 +1303,12 @@ struct _GArrowTime64ArrayBuilderClass GArrowArrayBuilderClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowTime64ArrayBuilder * garrow_time64_array_builder_new(GArrowTime64DataType *data_type); #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_0_12_FOR(garrow_time64_array_builder_append_value) gboolean garrow_time64_array_builder_append(GArrowTime64ArrayBuilder *builder, @@ -1168,6 +1320,8 @@ gboolean garrow_time64_array_builder_append_value(GArrowTime64ArrayBuilder *builder, gint64 value, GError **error); + +GARROW_AVAILABLE_IN_ALL gboolean garrow_time64_array_builder_append_values(GArrowTime64ArrayBuilder *builder, const gint64 *values, @@ -1176,10 +1330,13 @@ garrow_time64_array_builder_append_values(GArrowTime64ArrayBuilder *builder, gint64 is_valids_length, GError **error); #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_null) gboolean garrow_time64_array_builder_append_null(GArrowTime64ArrayBuilder *builder, GError **error); + +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_nulls) gboolean garrow_time64_array_builder_append_nulls(GArrowTime64ArrayBuilder *builder, @@ -1189,6 +1346,7 @@ garrow_time64_array_builder_append_nulls(GArrowTime64ArrayBuilder *builder, #define GARROW_TYPE_MONTH_INTERVAL_ARRAY_BUILDER \ (garrow_month_interval_array_builder_get_type()) +GARROW_AVAILABLE_IN_8_0 G_DECLARE_DERIVABLE_TYPE(GArrowMonthIntervalArrayBuilder, garrow_month_interval_array_builder, GARROW, @@ -1220,6 +1378,7 @@ garrow_month_interval_array_builder_append_values( #define GARROW_TYPE_DAY_TIME_INTERVAL_ARRAY_BUILDER \ (garrow_day_time_interval_array_builder_get_type()) +GARROW_AVAILABLE_IN_8_0 G_DECLARE_DERIVABLE_TYPE(GArrowDayTimeIntervalArrayBuilder, garrow_day_time_interval_array_builder, GARROW, @@ -1252,6 +1411,7 @@ garrow_day_time_interval_array_builder_append_values( #define GARROW_TYPE_MONTH_DAY_NANO_INTERVAL_ARRAY_BUILDER \ (garrow_month_day_nano_interval_array_builder_get_type()) +GARROW_AVAILABLE_IN_8_0 G_DECLARE_DERIVABLE_TYPE(GArrowMonthDayNanoIntervalArrayBuilder, garrow_month_day_nano_interval_array_builder, GARROW, @@ -1284,6 +1444,7 @@ garrow_month_day_nano_interval_array_builder_append_values( #define GARROW_TYPE_BINARY_DICTIONARY_ARRAY_BUILDER \ (garrow_binary_dictionary_array_builder_get_type()) +GARROW_AVAILABLE_IN_2_0 G_DECLARE_DERIVABLE_TYPE(GArrowBinaryDictionaryArrayBuilder, garrow_binary_dictionary_array_builder, GARROW, @@ -1350,6 +1511,7 @@ garrow_binary_dictionary_array_builder_reset_full( #define GARROW_TYPE_STRING_DICTIONARY_ARRAY_BUILDER \ (garrow_string_dictionary_array_builder_get_type()) +GARROW_AVAILABLE_IN_2_0 G_DECLARE_DERIVABLE_TYPE(GArrowStringDictionaryArrayBuilder, garrow_string_dictionary_array_builder, GARROW, @@ -1408,6 +1570,7 @@ garrow_string_dictionary_array_builder_reset_full( GArrowStringDictionaryArrayBuilder *builder); #define GARROW_TYPE_LIST_ARRAY_BUILDER (garrow_list_array_builder_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowListArrayBuilder, garrow_list_array_builder, GARROW, @@ -1418,10 +1581,12 @@ struct _GArrowListArrayBuilderClass GArrowArrayBuilderClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowListArrayBuilder * garrow_list_array_builder_new(GArrowListDataType *data_type, GError **error); #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_0_12_FOR(garrow_list_array_builder_append_value) gboolean garrow_list_array_builder_append(GArrowListArrayBuilder *builder, GError **error); @@ -1430,15 +1595,18 @@ GARROW_AVAILABLE_IN_0_12 gboolean garrow_list_array_builder_append_value(GArrowListArrayBuilder *builder, GError **error); #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_null) gboolean garrow_list_array_builder_append_null(GArrowListArrayBuilder *builder, GError **error); #endif +GARROW_AVAILABLE_IN_ALL GArrowArrayBuilder * garrow_list_array_builder_get_value_builder(GArrowListArrayBuilder *builder); #define GARROW_TYPE_LARGE_LIST_ARRAY_BUILDER (garrow_large_list_array_builder_get_type()) +GARROW_AVAILABLE_IN_0_16 G_DECLARE_DERIVABLE_TYPE(GArrowLargeListArrayBuilder, garrow_large_list_array_builder, GARROW, @@ -1468,6 +1636,7 @@ GArrowArrayBuilder * garrow_large_list_array_builder_get_value_builder(GArrowLargeListArrayBuilder *builder); #define GARROW_TYPE_STRUCT_ARRAY_BUILDER (garrow_struct_array_builder_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowStructArrayBuilder, garrow_struct_array_builder, GARROW, @@ -1478,10 +1647,12 @@ struct _GArrowStructArrayBuilderClass GArrowArrayBuilderClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowStructArrayBuilder * garrow_struct_array_builder_new(GArrowStructDataType *data_type, GError **error); #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_0_12_FOR(garrow_struct_array_builder_append_value) gboolean garrow_struct_array_builder_append(GArrowStructArrayBuilder *builder, GError **error); @@ -1491,6 +1662,7 @@ gboolean garrow_struct_array_builder_append_value(GArrowStructArrayBuilder *builder, GError **error); #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_null) gboolean garrow_struct_array_builder_append_null(GArrowStructArrayBuilder *builder, @@ -1498,15 +1670,19 @@ garrow_struct_array_builder_append_null(GArrowStructArrayBuilder *builder, #endif #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_12_0_FOR(garrow_array_builder_get_child) GArrowArrayBuilder * garrow_struct_array_builder_get_field_builder(GArrowStructArrayBuilder *builder, gint i); + +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_12_0_FOR(garrow_array_builder_get_children) GList * garrow_struct_array_builder_get_field_builders(GArrowStructArrayBuilder *builder); #endif #define GARROW_TYPE_MAP_ARRAY_BUILDER (garrow_map_array_builder_get_type()) +GARROW_AVAILABLE_IN_0_17 G_DECLARE_DERIVABLE_TYPE(GArrowMapArrayBuilder, garrow_map_array_builder, GARROW, @@ -1554,6 +1730,7 @@ GArrowArrayBuilder * garrow_map_array_builder_get_value_builder(GArrowMapArrayBuilder *builder); #define GARROW_TYPE_DECIMAL128_ARRAY_BUILDER (garrow_decimal128_array_builder_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowDecimal128ArrayBuilder, garrow_decimal128_array_builder, GARROW, @@ -1564,10 +1741,12 @@ struct _GArrowDecimal128ArrayBuilderClass GArrowFixedSizeBinaryArrayBuilderClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowDecimal128ArrayBuilder * garrow_decimal128_array_builder_new(GArrowDecimal128DataType *data_type); #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_0_12_FOR(garrow_decimal128_array_builder_append_value) gboolean garrow_decimal128_array_builder_append(GArrowDecimal128ArrayBuilder *builder, @@ -1596,6 +1775,7 @@ garrow_decimal128_array_builder_append_null(GArrowDecimal128ArrayBuilder *builde #endif #define GARROW_TYPE_DECIMAL256_ARRAY_BUILDER (garrow_decimal256_array_builder_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowDecimal256ArrayBuilder, garrow_decimal256_array_builder, GARROW, @@ -1606,6 +1786,7 @@ struct _GArrowDecimal256ArrayBuilderClass GArrowFixedSizeBinaryArrayBuilderClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowDecimal256ArrayBuilder * garrow_decimal256_array_builder_new(GArrowDecimal256DataType *data_type); @@ -1624,6 +1805,7 @@ garrow_decimal256_array_builder_append_values(GArrowDecimal256ArrayBuilder *buil GError **error); #define GARROW_TYPE_UNION_ARRAY_BUILDER (garrow_union_array_builder_get_type()) +GARROW_AVAILABLE_IN_12_0 G_DECLARE_DERIVABLE_TYPE(GArrowUnionArrayBuilder, garrow_union_array_builder, GARROW, @@ -1648,6 +1830,7 @@ garrow_union_array_builder_append_value(GArrowUnionArrayBuilder *builder, #define GARROW_TYPE_DENSE_UNION_ARRAY_BUILDER \ (garrow_dense_union_array_builder_get_type()) +GARROW_AVAILABLE_IN_12_0 G_DECLARE_DERIVABLE_TYPE(GArrowDenseUnionArrayBuilder, garrow_dense_union_array_builder, GARROW, @@ -1664,6 +1847,7 @@ garrow_dense_union_array_builder_new(GArrowDenseUnionDataType *data_type, GError #define GARROW_TYPE_SPARSE_UNION_ARRAY_BUILDER \ (garrow_sparse_union_array_builder_get_type()) +GARROW_AVAILABLE_IN_12_0 G_DECLARE_DERIVABLE_TYPE(GArrowSparseUnionArrayBuilder, garrow_sparse_union_array_builder, GARROW, diff --git a/c_glib/arrow-glib/basic-array-definition.h b/c_glib/arrow-glib/basic-array-definition.h index 54642dae018ec..2fa67c09c1cc4 100644 --- a/c_glib/arrow-glib/basic-array-definition.h +++ b/c_glib/arrow-glib/basic-array-definition.h @@ -21,9 +21,12 @@ #include +#include + G_BEGIN_DECLS #define GARROW_TYPE_ARRAY (garrow_array_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowArray, garrow_array, GARROW, ARRAY, GObject) struct _GArrowArrayClass { @@ -31,6 +34,7 @@ struct _GArrowArrayClass }; #define GARROW_TYPE_EXTENSION_ARRAY (garrow_extension_array_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE( GArrowExtensionArray, garrow_extension_array, GARROW, EXTENSION_ARRAY, GArrowArray) struct _GArrowExtensionArrayClass diff --git a/c_glib/arrow-glib/basic-array.h b/c_glib/arrow-glib/basic-array.h index ee6f40b1ddc24..95679aa37c57a 100644 --- a/c_glib/arrow-glib/basic-array.h +++ b/c_glib/arrow-glib/basic-array.h @@ -27,6 +27,7 @@ G_BEGIN_DECLS #define GARROW_TYPE_EQUAL_OPTIONS (garrow_equal_options_get_type()) +GARROW_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE( GArrowEqualOptions, garrow_equal_options, GARROW, EQUAL_OPTIONS, GObject) struct _GArrowEqualOptionsClass @@ -52,6 +53,7 @@ garrow_array_export(GArrowArray *array, gpointer *c_abi_schema, GError **error); +GARROW_AVAILABLE_IN_ALL gboolean garrow_array_equal(GArrowArray *array, GArrowArray *other_array); GARROW_AVAILABLE_IN_5_0 @@ -59,8 +61,11 @@ gboolean garrow_array_equal_options(GArrowArray *array, GArrowArray *other_array, GArrowEqualOptions *options); +GARROW_AVAILABLE_IN_ALL gboolean garrow_array_equal_approx(GArrowArray *array, GArrowArray *other_array); + +GARROW_AVAILABLE_IN_ALL gboolean garrow_array_equal_range(GArrowArray *array, gint64 start_index, @@ -69,37 +74,60 @@ garrow_array_equal_range(GArrowArray *array, gint64 end_index, GArrowEqualOptions *options); +GARROW_AVAILABLE_IN_ALL gboolean garrow_array_is_null(GArrowArray *array, gint64 i); + +GARROW_AVAILABLE_IN_ALL gboolean garrow_array_is_valid(GArrowArray *array, gint64 i); + +GARROW_AVAILABLE_IN_ALL gint64 garrow_array_get_length(GArrowArray *array); + +GARROW_AVAILABLE_IN_ALL gint64 garrow_array_get_offset(GArrowArray *array); + +GARROW_AVAILABLE_IN_ALL gint64 garrow_array_get_n_nulls(GArrowArray *array); + +GARROW_AVAILABLE_IN_ALL GArrowBuffer * garrow_array_get_null_bitmap(GArrowArray *array); + +GARROW_AVAILABLE_IN_ALL GArrowDataType * garrow_array_get_value_data_type(GArrowArray *array); + +GARROW_AVAILABLE_IN_ALL GArrowType garrow_array_get_value_type(GArrowArray *array); + +GARROW_AVAILABLE_IN_ALL GArrowArray * garrow_array_slice(GArrowArray *array, gint64 offset, gint64 length); + +GARROW_AVAILABLE_IN_ALL gchar * garrow_array_to_string(GArrowArray *array, GError **error); + GARROW_AVAILABLE_IN_0_15 GArrowArray * garrow_array_view(GArrowArray *array, GArrowDataType *return_type, GError **error); + GARROW_AVAILABLE_IN_0_15 gchar * garrow_array_diff_unified(GArrowArray *array, GArrowArray *other_array); + GARROW_AVAILABLE_IN_4_0 GArrowArray * garrow_array_concatenate(GArrowArray *array, GList *other_arrays, GError **error); #define GARROW_TYPE_NULL_ARRAY (garrow_null_array_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE( GArrowNullArray, garrow_null_array, GARROW, NULL_ARRAY, GArrowArray) struct _GArrowNullArrayClass @@ -107,10 +135,12 @@ struct _GArrowNullArrayClass GArrowArrayClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowNullArray * garrow_null_array_new(gint64 length); #define GARROW_TYPE_PRIMITIVE_ARRAY (garrow_primitive_array_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE( GArrowPrimitiveArray, garrow_primitive_array, GARROW, PRIMITIVE_ARRAY, GArrowArray) struct _GArrowPrimitiveArrayClass @@ -119,6 +149,7 @@ struct _GArrowPrimitiveArrayClass }; #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_1_0_FOR(garrow_primitive_array_get_data_buffer) GArrowBuffer * garrow_primitive_array_get_buffer(GArrowPrimitiveArray *array); @@ -128,6 +159,7 @@ GArrowBuffer * garrow_primitive_array_get_data_buffer(GArrowPrimitiveArray *array); #define GARROW_TYPE_BOOLEAN_ARRAY (garrow_boolean_array_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE( GArrowBooleanArray, garrow_boolean_array, GARROW, BOOLEAN_ARRAY, GArrowPrimitiveArray) struct _GArrowBooleanArrayClass @@ -135,18 +167,23 @@ struct _GArrowBooleanArrayClass GArrowPrimitiveArrayClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowBooleanArray * garrow_boolean_array_new(gint64 length, GArrowBuffer *data, GArrowBuffer *null_bitmap, gint64 n_nulls); +GARROW_AVAILABLE_IN_ALL gboolean garrow_boolean_array_get_value(GArrowBooleanArray *array, gint64 i); + +GARROW_AVAILABLE_IN_ALL gboolean * garrow_boolean_array_get_values(GArrowBooleanArray *array, gint64 *length); #define GARROW_TYPE_NUMERIC_ARRAY (garrow_numeric_array_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE( GArrowNumericArray, garrow_numeric_array, GARROW, NUMERIC_ARRAY, GArrowPrimitiveArray) struct _GArrowNumericArrayClass @@ -155,6 +192,7 @@ struct _GArrowNumericArrayClass }; #define GARROW_TYPE_INT8_ARRAY (garrow_int8_array_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE( GArrowInt8Array, garrow_int8_array, GARROW, INT8_ARRAY, GArrowNumericArray) struct _GArrowInt8ArrayClass @@ -162,18 +200,23 @@ struct _GArrowInt8ArrayClass GArrowNumericArrayClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowInt8Array * garrow_int8_array_new(gint64 length, GArrowBuffer *data, GArrowBuffer *null_bitmap, gint64 n_nulls); +GARROW_AVAILABLE_IN_ALL gint8 garrow_int8_array_get_value(GArrowInt8Array *array, gint64 i); + +GARROW_AVAILABLE_IN_ALL const gint8 * garrow_int8_array_get_values(GArrowInt8Array *array, gint64 *length); #define GARROW_TYPE_UINT8_ARRAY (garrow_uint8_array_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE( GArrowUInt8Array, garrow_uint8_array, GARROW, UINT8_ARRAY, GArrowNumericArray) struct _GArrowUInt8ArrayClass @@ -181,18 +224,23 @@ struct _GArrowUInt8ArrayClass GArrowNumericArrayClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowUInt8Array * garrow_uint8_array_new(gint64 length, GArrowBuffer *data, GArrowBuffer *null_bitmap, gint64 n_nulls); +GARROW_AVAILABLE_IN_ALL guint8 garrow_uint8_array_get_value(GArrowUInt8Array *array, gint64 i); + +GARROW_AVAILABLE_IN_ALL const guint8 * garrow_uint8_array_get_values(GArrowUInt8Array *array, gint64 *length); #define GARROW_TYPE_INT16_ARRAY (garrow_int16_array_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE( GArrowInt16Array, garrow_int16_array, GARROW, INT16_ARRAY, GArrowNumericArray) struct _GArrowInt16ArrayClass @@ -200,18 +248,23 @@ struct _GArrowInt16ArrayClass GArrowNumericArrayClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowInt16Array * garrow_int16_array_new(gint64 length, GArrowBuffer *data, GArrowBuffer *null_bitmap, gint64 n_nulls); +GARROW_AVAILABLE_IN_ALL gint16 garrow_int16_array_get_value(GArrowInt16Array *array, gint64 i); + +GARROW_AVAILABLE_IN_ALL const gint16 * garrow_int16_array_get_values(GArrowInt16Array *array, gint64 *length); #define GARROW_TYPE_UINT16_ARRAY (garrow_uint16_array_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE( GArrowUInt16Array, garrow_uint16_array, GARROW, UINT16_ARRAY, GArrowNumericArray) struct _GArrowUInt16ArrayClass @@ -219,18 +272,23 @@ struct _GArrowUInt16ArrayClass GArrowNumericArrayClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowUInt16Array * garrow_uint16_array_new(gint64 length, GArrowBuffer *data, GArrowBuffer *null_bitmap, gint64 n_nulls); +GARROW_AVAILABLE_IN_ALL guint16 garrow_uint16_array_get_value(GArrowUInt16Array *array, gint64 i); + +GARROW_AVAILABLE_IN_ALL const guint16 * garrow_uint16_array_get_values(GArrowUInt16Array *array, gint64 *length); #define GARROW_TYPE_INT32_ARRAY (garrow_int32_array_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE( GArrowInt32Array, garrow_int32_array, GARROW, INT32_ARRAY, GArrowNumericArray) struct _GArrowInt32ArrayClass @@ -238,18 +296,23 @@ struct _GArrowInt32ArrayClass GArrowNumericArrayClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowInt32Array * garrow_int32_array_new(gint64 length, GArrowBuffer *data, GArrowBuffer *null_bitmap, gint64 n_nulls); +GARROW_AVAILABLE_IN_ALL gint32 garrow_int32_array_get_value(GArrowInt32Array *array, gint64 i); + +GARROW_AVAILABLE_IN_ALL const gint32 * garrow_int32_array_get_values(GArrowInt32Array *array, gint64 *length); #define GARROW_TYPE_UINT32_ARRAY (garrow_uint32_array_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE( GArrowUInt32Array, garrow_uint32_array, GARROW, UINT32_ARRAY, GArrowNumericArray) struct _GArrowUInt32ArrayClass @@ -257,18 +320,23 @@ struct _GArrowUInt32ArrayClass GArrowNumericArrayClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowUInt32Array * garrow_uint32_array_new(gint64 length, GArrowBuffer *data, GArrowBuffer *null_bitmap, gint64 n_nulls); +GARROW_AVAILABLE_IN_ALL guint32 garrow_uint32_array_get_value(GArrowUInt32Array *array, gint64 i); + +GARROW_AVAILABLE_IN_ALL const guint32 * garrow_uint32_array_get_values(GArrowUInt32Array *array, gint64 *length); #define GARROW_TYPE_INT64_ARRAY (garrow_int64_array_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE( GArrowInt64Array, garrow_int64_array, GARROW, INT64_ARRAY, GArrowNumericArray) struct _GArrowInt64ArrayClass @@ -276,18 +344,23 @@ struct _GArrowInt64ArrayClass GArrowNumericArrayClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowInt64Array * garrow_int64_array_new(gint64 length, GArrowBuffer *data, GArrowBuffer *null_bitmap, gint64 n_nulls); +GARROW_AVAILABLE_IN_ALL gint64 garrow_int64_array_get_value(GArrowInt64Array *array, gint64 i); + +GARROW_AVAILABLE_IN_ALL const gint64 * garrow_int64_array_get_values(GArrowInt64Array *array, gint64 *length); #define GARROW_TYPE_UINT64_ARRAY (garrow_uint64_array_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE( GArrowUInt64Array, garrow_uint64_array, GARROW, UINT64_ARRAY, GArrowNumericArray) struct _GArrowUInt64ArrayClass @@ -295,18 +368,23 @@ struct _GArrowUInt64ArrayClass GArrowNumericArrayClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowUInt64Array * garrow_uint64_array_new(gint64 length, GArrowBuffer *data, GArrowBuffer *null_bitmap, gint64 n_nulls); +GARROW_AVAILABLE_IN_ALL guint64 garrow_uint64_array_get_value(GArrowUInt64Array *array, gint64 i); + +GARROW_AVAILABLE_IN_ALL const guint64 * garrow_uint64_array_get_values(GArrowUInt64Array *array, gint64 *length); #define GARROW_TYPE_HALF_FLOAT_ARRAY (garrow_half_float_array_get_type()) +GARROW_AVAILABLE_IN_11_0 G_DECLARE_DERIVABLE_TYPE(GArrowHalfFloatArray, garrow_half_float_array, GARROW, @@ -332,6 +410,7 @@ const guint16 * garrow_half_float_array_get_values(GArrowHalfFloatArray *array, gint64 *length); #define GARROW_TYPE_FLOAT_ARRAY (garrow_float_array_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE( GArrowFloatArray, garrow_float_array, GARROW, FLOAT_ARRAY, GArrowNumericArray) struct _GArrowFloatArrayClass @@ -339,18 +418,23 @@ struct _GArrowFloatArrayClass GArrowNumericArrayClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowFloatArray * garrow_float_array_new(gint64 length, GArrowBuffer *data, GArrowBuffer *null_bitmap, gint64 n_nulls); +GARROW_AVAILABLE_IN_ALL gfloat garrow_float_array_get_value(GArrowFloatArray *array, gint64 i); + +GARROW_AVAILABLE_IN_ALL const gfloat * garrow_float_array_get_values(GArrowFloatArray *array, gint64 *length); #define GARROW_TYPE_DOUBLE_ARRAY (garrow_double_array_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE( GArrowDoubleArray, garrow_double_array, GARROW, DOUBLE_ARRAY, GArrowNumericArray) struct _GArrowDoubleArrayClass @@ -358,18 +442,23 @@ struct _GArrowDoubleArrayClass GArrowNumericArrayClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowDoubleArray * garrow_double_array_new(gint64 length, GArrowBuffer *data, GArrowBuffer *null_bitmap, gint64 n_nulls); +GARROW_AVAILABLE_IN_ALL gdouble garrow_double_array_get_value(GArrowDoubleArray *array, gint64 i); + +GARROW_AVAILABLE_IN_ALL const gdouble * garrow_double_array_get_values(GArrowDoubleArray *array, gint64 *length); #define GARROW_TYPE_BINARY_ARRAY (garrow_binary_array_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE( GArrowBinaryArray, garrow_binary_array, GARROW, BINARY_ARRAY, GArrowArray) struct _GArrowBinaryArrayClass @@ -377,6 +466,7 @@ struct _GArrowBinaryArrayClass GArrowArrayClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowBinaryArray * garrow_binary_array_new(gint64 length, GArrowBuffer *value_offsets, @@ -384,9 +474,12 @@ garrow_binary_array_new(gint64 length, GArrowBuffer *null_bitmap, gint64 n_nulls); +GARROW_AVAILABLE_IN_ALL GBytes * garrow_binary_array_get_value(GArrowBinaryArray *array, gint64 i); + #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_1_0_FOR(garrow_binary_array_get_data_buffer) GArrowBuffer * garrow_binary_array_get_buffer(GArrowBinaryArray *array); @@ -394,10 +487,13 @@ garrow_binary_array_get_buffer(GArrowBinaryArray *array); GARROW_AVAILABLE_IN_1_0 GArrowBuffer * garrow_binary_array_get_data_buffer(GArrowBinaryArray *array); + +GARROW_AVAILABLE_IN_ALL GArrowBuffer * garrow_binary_array_get_offsets_buffer(GArrowBinaryArray *array); #define GARROW_TYPE_LARGE_BINARY_ARRAY (garrow_large_binary_array_get_type()) +GARROW_AVAILABLE_IN_0_16 G_DECLARE_DERIVABLE_TYPE(GArrowLargeBinaryArray, garrow_large_binary_array, GARROW, @@ -428,11 +524,13 @@ garrow_large_binary_array_get_buffer(GArrowLargeBinaryArray *array); GARROW_AVAILABLE_IN_1_0 GArrowBuffer * garrow_large_binary_array_get_data_buffer(GArrowLargeBinaryArray *array); + GARROW_AVAILABLE_IN_0_16 GArrowBuffer * garrow_large_binary_array_get_offsets_buffer(GArrowLargeBinaryArray *array); #define GARROW_TYPE_STRING_ARRAY (garrow_string_array_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE( GArrowStringArray, garrow_string_array, GARROW, STRING_ARRAY, GArrowBinaryArray) struct _GArrowStringArrayClass @@ -440,6 +538,7 @@ struct _GArrowStringArrayClass GArrowBinaryArrayClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowStringArray * garrow_string_array_new(gint64 length, GArrowBuffer *value_offsets, @@ -447,10 +546,12 @@ garrow_string_array_new(gint64 length, GArrowBuffer *null_bitmap, gint64 n_nulls); +GARROW_AVAILABLE_IN_ALL gchar * garrow_string_array_get_string(GArrowStringArray *array, gint64 i); #define GARROW_TYPE_LARGE_STRING_ARRAY (garrow_large_string_array_get_type()) +GARROW_AVAILABLE_IN_0_16 G_DECLARE_DERIVABLE_TYPE(GArrowLargeStringArray, garrow_large_string_array, GARROW, @@ -474,6 +575,7 @@ gchar * garrow_large_string_array_get_string(GArrowLargeStringArray *array, gint64 i); #define GARROW_TYPE_DATE32_ARRAY (garrow_date32_array_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE( GArrowDate32Array, garrow_date32_array, GARROW, DATE32_ARRAY, GArrowNumericArray) struct _GArrowDate32ArrayClass @@ -481,18 +583,23 @@ struct _GArrowDate32ArrayClass GArrowNumericArrayClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowDate32Array * garrow_date32_array_new(gint64 length, GArrowBuffer *data, GArrowBuffer *null_bitmap, gint64 n_nulls); +GARROW_AVAILABLE_IN_ALL gint32 garrow_date32_array_get_value(GArrowDate32Array *array, gint64 i); + +GARROW_AVAILABLE_IN_ALL const gint32 * garrow_date32_array_get_values(GArrowDate32Array *array, gint64 *length); #define GARROW_TYPE_DATE64_ARRAY (garrow_date64_array_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE( GArrowDate64Array, garrow_date64_array, GARROW, DATE64_ARRAY, GArrowNumericArray) struct _GArrowDate64ArrayClass @@ -500,18 +607,23 @@ struct _GArrowDate64ArrayClass GArrowNumericArrayClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowDate64Array * garrow_date64_array_new(gint64 length, GArrowBuffer *data, GArrowBuffer *null_bitmap, gint64 n_nulls); +GARROW_AVAILABLE_IN_ALL gint64 garrow_date64_array_get_value(GArrowDate64Array *array, gint64 i); + +GARROW_AVAILABLE_IN_ALL const gint64 * garrow_date64_array_get_values(GArrowDate64Array *array, gint64 *length); #define GARROW_TYPE_TIMESTAMP_ARRAY (garrow_timestamp_array_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowTimestampArray, garrow_timestamp_array, GARROW, @@ -522,6 +634,7 @@ struct _GArrowTimestampArrayClass GArrowNumericArrayClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowTimestampArray * garrow_timestamp_array_new(GArrowTimestampDataType *data_type, gint64 length, @@ -529,12 +642,16 @@ garrow_timestamp_array_new(GArrowTimestampDataType *data_type, GArrowBuffer *null_bitmap, gint64 n_nulls); +GARROW_AVAILABLE_IN_ALL gint64 garrow_timestamp_array_get_value(GArrowTimestampArray *array, gint64 i); + +GARROW_AVAILABLE_IN_ALL const gint64 * garrow_timestamp_array_get_values(GArrowTimestampArray *array, gint64 *length); #define GARROW_TYPE_TIME32_ARRAY (garrow_time32_array_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE( GArrowTime32Array, garrow_time32_array, GARROW, TIME32_ARRAY, GArrowNumericArray) struct _GArrowTime32ArrayClass @@ -542,6 +659,7 @@ struct _GArrowTime32ArrayClass GArrowNumericArrayClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowTime32Array * garrow_time32_array_new(GArrowTime32DataType *data_type, gint64 length, @@ -549,12 +667,16 @@ garrow_time32_array_new(GArrowTime32DataType *data_type, GArrowBuffer *null_bitmap, gint64 n_nulls); +GARROW_AVAILABLE_IN_ALL gint32 garrow_time32_array_get_value(GArrowTime32Array *array, gint64 i); + +GARROW_AVAILABLE_IN_ALL const gint32 * garrow_time32_array_get_values(GArrowTime32Array *array, gint64 *length); #define GARROW_TYPE_TIME64_ARRAY (garrow_time64_array_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE( GArrowTime64Array, garrow_time64_array, GARROW, TIME64_ARRAY, GArrowNumericArray) struct _GArrowTime64ArrayClass @@ -562,6 +684,7 @@ struct _GArrowTime64ArrayClass GArrowNumericArrayClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowTime64Array * garrow_time64_array_new(GArrowTime64DataType *data_type, gint64 length, @@ -569,12 +692,16 @@ garrow_time64_array_new(GArrowTime64DataType *data_type, GArrowBuffer *null_bitmap, gint64 n_nulls); +GARROW_AVAILABLE_IN_ALL gint64 garrow_time64_array_get_value(GArrowTime64Array *array, gint64 i); + +GARROW_AVAILABLE_IN_ALL const gint64 * garrow_time64_array_get_values(GArrowTime64Array *array, gint64 *length); #define GARROW_TYPE_MONTH_INTERVAL_ARRAY (garrow_month_interval_array_get_type()) +GARROW_AVAILABLE_IN_8_0 G_DECLARE_DERIVABLE_TYPE(GArrowMonthIntervalArray, garrow_month_interval_array, GARROW, @@ -594,11 +721,13 @@ garrow_month_interval_array_new(gint64 length, GARROW_AVAILABLE_IN_8_0 gint32 garrow_month_interval_array_get_value(GArrowMonthIntervalArray *array, gint64 i); + GARROW_AVAILABLE_IN_8_0 const gint32 * garrow_month_interval_array_get_values(GArrowMonthIntervalArray *array, gint64 *length); #define GARROW_TYPE_DAY_TIME_INTERVAL_ARRAY (garrow_day_time_interval_array_get_type()) +GARROW_AVAILABLE_IN_8_0 G_DECLARE_DERIVABLE_TYPE(GArrowDayTimeIntervalArray, garrow_day_time_interval_array, GARROW, @@ -618,12 +747,14 @@ garrow_day_time_interval_array_new(gint64 length, GARROW_AVAILABLE_IN_8_0 GArrowDayMillisecond * garrow_day_time_interval_array_get_value(GArrowDayTimeIntervalArray *array, gint64 i); + GARROW_AVAILABLE_IN_8_0 GList * garrow_day_time_interval_array_get_values(GArrowDayTimeIntervalArray *array); #define GARROW_TYPE_MONTH_DAY_NANO_INTERVAL_ARRAY \ (garrow_month_day_nano_interval_array_get_type()) +GARROW_AVAILABLE_IN_8_0 G_DECLARE_DERIVABLE_TYPE(GArrowMonthDayNanoIntervalArray, garrow_month_day_nano_interval_array, GARROW, @@ -649,6 +780,7 @@ GList * garrow_month_day_nano_interval_array_get_values(GArrowMonthDayNanoIntervalArray *array); #define GARROW_TYPE_FIXED_SIZE_BINARY_ARRAY (garrow_fixed_size_binary_array_get_type()) +GARROW_AVAILABLE_IN_3_0 G_DECLARE_DERIVABLE_TYPE(GArrowFixedSizeBinaryArray, garrow_fixed_size_binary_array, GARROW, @@ -669,14 +801,17 @@ garrow_fixed_size_binary_array_new(GArrowFixedSizeBinaryDataType *data_type, GARROW_AVAILABLE_IN_3_0 gint32 garrow_fixed_size_binary_array_get_byte_width(GArrowFixedSizeBinaryArray *array); + GARROW_AVAILABLE_IN_3_0 GBytes * garrow_fixed_size_binary_array_get_value(GArrowFixedSizeBinaryArray *array, gint64 i); + GARROW_AVAILABLE_IN_3_0 GBytes * garrow_fixed_size_binary_array_get_values_bytes(GArrowFixedSizeBinaryArray *array); #define GARROW_TYPE_DECIMAL128_ARRAY (garrow_decimal128_array_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowDecimal128Array, garrow_decimal128_array, GARROW, @@ -687,12 +822,16 @@ struct _GArrowDecimal128ArrayClass GArrowFixedSizeBinaryArrayClass parent_class; }; +GARROW_AVAILABLE_IN_ALL gchar * garrow_decimal128_array_format_value(GArrowDecimal128Array *array, gint64 i); + +GARROW_AVAILABLE_IN_ALL GArrowDecimal128 * garrow_decimal128_array_get_value(GArrowDecimal128Array *array, gint64 i); #define GARROW_TYPE_DECIMAL256_ARRAY (garrow_decimal256_array_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowDecimal256Array, garrow_decimal256_array, GARROW, @@ -703,8 +842,11 @@ struct _GArrowDecimal256ArrayClass GArrowFixedSizeBinaryArrayClass parent_class; }; +GARROW_AVAILABLE_IN_ALL gchar * garrow_decimal256_array_format_value(GArrowDecimal256Array *array, gint64 i); + +GARROW_AVAILABLE_IN_ALL GArrowDecimal256 * garrow_decimal256_array_get_value(GArrowDecimal256Array *array, gint64 i); diff --git a/c_glib/arrow-glib/basic-array.hpp b/c_glib/arrow-glib/basic-array.hpp index f010cf3db4bc3..b2a7ed6ae075f 100644 --- a/c_glib/arrow-glib/basic-array.hpp +++ b/c_glib/arrow-glib/basic-array.hpp @@ -23,22 +23,32 @@ #include +GARROW_EXTERN arrow::EqualOptions * garrow_equal_options_get_raw(GArrowEqualOptions *equal_options); +GARROW_EXTERN GArrowArray * garrow_array_new_raw(std::shared_ptr *arrow_array); + +GARROW_EXTERN GArrowArray * garrow_array_new_raw(std::shared_ptr *arrow_array, const gchar *first_property_name, ...); + +GARROW_EXTERN GArrowArray * garrow_array_new_raw_valist(std::shared_ptr *arrow_array, const gchar *first_property_name, va_list args); + +GARROW_EXTERN GArrowExtensionArray * garrow_extension_array_new_raw(std::shared_ptr *arrow_array, GArrowArray *storage); + +GARROW_EXTERN std::shared_ptr garrow_array_get_raw(GArrowArray *array); diff --git a/c_glib/arrow-glib/basic-data-type.cpp b/c_glib/arrow-glib/basic-data-type.cpp index 36646a9733cd3..d1c06000065dc 100644 --- a/c_glib/arrow-glib/basic-data-type.cpp +++ b/c_glib/arrow-glib/basic-data-type.cpp @@ -1801,6 +1801,8 @@ garrow_extension_data_type_wrap_chunked_array(GArrowExtensionDataType *data_type return garrow_chunked_array_new_raw(&arrow_extension_chunked_array); } +G_END_DECLS + static std::shared_ptr garrow_extension_data_type_get_storage_data_type_raw(GArrowExtensionDataType *data_type) { @@ -1808,8 +1810,6 @@ garrow_extension_data_type_get_storage_data_type_raw(GArrowExtensionDataType *da return garrow_data_type_get_raw(priv->storage_data_type); } -G_END_DECLS - namespace garrow { GExtensionType::GExtensionType(GArrowExtensionDataType *garrow_data_type) : arrow::ExtensionType( diff --git a/c_glib/arrow-glib/basic-data-type.h b/c_glib/arrow-glib/basic-data-type.h index 01c9e5ef6e40a..77180018c9be8 100644 --- a/c_glib/arrow-glib/basic-data-type.h +++ b/c_glib/arrow-glib/basic-data-type.h @@ -28,6 +28,7 @@ G_BEGIN_DECLS #define GARROW_TYPE_DATA_TYPE (garrow_data_type_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowDataType, garrow_data_type, GARROW, DATA_TYPE, GObject) struct _GArrowDataTypeClass { @@ -42,17 +43,24 @@ GARROW_AVAILABLE_IN_6_0 gpointer garrow_data_type_export(GArrowDataType *data_type, GError **error); +GARROW_AVAILABLE_IN_ALL gboolean garrow_data_type_equal(GArrowDataType *data_type, GArrowDataType *other_data_type); + +GARROW_AVAILABLE_IN_ALL gchar * garrow_data_type_to_string(GArrowDataType *data_type); + +GARROW_AVAILABLE_IN_ALL GArrowType garrow_data_type_get_id(GArrowDataType *data_type); + GARROW_AVAILABLE_IN_3_0 gchar * garrow_data_type_get_name(GArrowDataType *data_type); #define GARROW_TYPE_FIXED_WIDTH_DATA_TYPE (garrow_fixed_width_data_type_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowFixedWidthDataType, garrow_fixed_width_data_type, GARROW, @@ -63,6 +71,7 @@ struct _GArrowFixedWidthDataTypeClass GArrowDataTypeClass parent_class; }; +GARROW_AVAILABLE_IN_ALL gint garrow_fixed_width_data_type_get_bit_width(GArrowFixedWidthDataType *data_type); /* TODO: @@ -71,6 +80,7 @@ GList *garrow_fixed_width_data_type_get_buffer_layout(GArrowFixedWidthDataType */ #define GARROW_TYPE_NULL_DATA_TYPE (garrow_null_data_type_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE( GArrowNullDataType, garrow_null_data_type, GARROW, NULL_DATA_TYPE, GArrowDataType) struct _GArrowNullDataTypeClass @@ -78,10 +88,12 @@ struct _GArrowNullDataTypeClass GArrowDataTypeClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowNullDataType * garrow_null_data_type_new(void); #define GARROW_TYPE_BOOLEAN_DATA_TYPE (garrow_boolean_data_type_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowBooleanDataType, garrow_boolean_data_type, GARROW, @@ -92,10 +104,12 @@ struct _GArrowBooleanDataTypeClass GArrowFixedWidthDataTypeClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowBooleanDataType * garrow_boolean_data_type_new(void); #define GARROW_TYPE_NUMERIC_DATA_TYPE (garrow_numeric_data_type_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowNumericDataType, garrow_numeric_data_type, GARROW, @@ -107,6 +121,7 @@ struct _GArrowNumericDataTypeClass }; #define GARROW_TYPE_INTEGER_DATA_TYPE (garrow_integer_data_type_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowIntegerDataType, garrow_integer_data_type, GARROW, @@ -122,6 +137,7 @@ gboolean garrow_integer_data_type_is_signed(GArrowIntegerDataType *data_type); #define GARROW_TYPE_INT8_DATA_TYPE (garrow_int8_data_type_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowInt8DataType, garrow_int8_data_type, GARROW, @@ -132,10 +148,12 @@ struct _GArrowInt8DataTypeClass GArrowIntegerDataTypeClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowInt8DataType * garrow_int8_data_type_new(void); #define GARROW_TYPE_UINT8_DATA_TYPE (garrow_uint8_data_type_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowUInt8DataType, garrow_uint8_data_type, GARROW, @@ -146,10 +164,12 @@ struct _GArrowUInt8DataTypeClass GArrowIntegerDataTypeClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowUInt8DataType * garrow_uint8_data_type_new(void); #define GARROW_TYPE_INT16_DATA_TYPE (garrow_int16_data_type_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowInt16DataType, garrow_int16_data_type, GARROW, @@ -160,10 +180,12 @@ struct _GArrowInt16DataTypeClass GArrowIntegerDataTypeClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowInt16DataType * garrow_int16_data_type_new(void); #define GARROW_TYPE_UINT16_DATA_TYPE (garrow_uint16_data_type_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowUInt16DataType, garrow_uint16_data_type, GARROW, @@ -174,10 +196,12 @@ struct _GArrowUInt16DataTypeClass GArrowIntegerDataTypeClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowUInt16DataType * garrow_uint16_data_type_new(void); #define GARROW_TYPE_INT32_DATA_TYPE (garrow_int32_data_type_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowInt32DataType, garrow_int32_data_type, GARROW, @@ -188,10 +212,12 @@ struct _GArrowInt32DataTypeClass GArrowIntegerDataTypeClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowInt32DataType * garrow_int32_data_type_new(void); #define GARROW_TYPE_UINT32_DATA_TYPE (garrow_uint32_data_type_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowUInt32DataType, garrow_uint32_data_type, GARROW, @@ -202,10 +228,12 @@ struct _GArrowUInt32DataTypeClass GArrowIntegerDataTypeClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowUInt32DataType * garrow_uint32_data_type_new(void); #define GARROW_TYPE_INT64_DATA_TYPE (garrow_int64_data_type_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowInt64DataType, garrow_int64_data_type, GARROW, @@ -216,10 +244,12 @@ struct _GArrowInt64DataTypeClass GArrowIntegerDataTypeClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowInt64DataType * garrow_int64_data_type_new(void); #define GARROW_TYPE_UINT64_DATA_TYPE (garrow_uint64_data_type_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowUInt64DataType, garrow_uint64_data_type, GARROW, @@ -230,10 +260,12 @@ struct _GArrowUInt64DataTypeClass GArrowIntegerDataTypeClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowUInt64DataType * garrow_uint64_data_type_new(void); #define GARROW_TYPE_FLOATING_POINT_DATA_TYPE (garrow_floating_point_data_type_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowFloatingPointDataType, garrow_floating_point_data_type, GARROW, @@ -245,6 +277,7 @@ struct _GArrowFloatingPointDataTypeClass }; #define GARROW_TYPE_HALF_FLOAT_DATA_TYPE (garrow_half_float_data_type_get_type()) +GARROW_AVAILABLE_IN_11_0 G_DECLARE_DERIVABLE_TYPE(GArrowHalfFloatDataType, garrow_half_float_data_type, GARROW, @@ -260,6 +293,7 @@ GArrowHalfFloatDataType * garrow_half_float_data_type_new(void); #define GARROW_TYPE_FLOAT_DATA_TYPE (garrow_float_data_type_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowFloatDataType, garrow_float_data_type, GARROW, @@ -270,10 +304,12 @@ struct _GArrowFloatDataTypeClass GArrowFloatingPointDataTypeClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowFloatDataType * garrow_float_data_type_new(void); #define GARROW_TYPE_DOUBLE_DATA_TYPE (garrow_double_data_type_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowDoubleDataType, garrow_double_data_type, GARROW, @@ -284,10 +320,12 @@ struct _GArrowDoubleDataTypeClass GArrowFloatingPointDataTypeClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowDoubleDataType * garrow_double_data_type_new(void); #define GARROW_TYPE_BINARY_DATA_TYPE (garrow_binary_data_type_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE( GArrowBinaryDataType, garrow_binary_data_type, GARROW, BINARY_DATA_TYPE, GArrowDataType) struct _GArrowBinaryDataTypeClass @@ -295,11 +333,13 @@ struct _GArrowBinaryDataTypeClass GArrowDataTypeClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowBinaryDataType * garrow_binary_data_type_new(void); #define GARROW_TYPE_FIXED_SIZE_BINARY_DATA_TYPE \ (garrow_fixed_size_binary_data_type_get_type()) +GARROW_AVAILABLE_IN_0_12 G_DECLARE_DERIVABLE_TYPE(GArrowFixedSizeBinaryDataType, garrow_fixed_size_binary_data_type, GARROW, @@ -319,6 +359,7 @@ garrow_fixed_size_binary_data_type_get_byte_width( GArrowFixedSizeBinaryDataType *data_type); #define GARROW_TYPE_LARGE_BINARY_DATA_TYPE (garrow_large_binary_data_type_get_type()) +GARROW_AVAILABLE_IN_0_17 G_DECLARE_DERIVABLE_TYPE(GArrowLargeBinaryDataType, garrow_large_binary_data_type, GARROW, @@ -334,6 +375,7 @@ GArrowLargeBinaryDataType * garrow_large_binary_data_type_new(void); #define GARROW_TYPE_STRING_DATA_TYPE (garrow_string_data_type_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowStringDataType, garrow_string_data_type, GARROW, @@ -344,10 +386,12 @@ struct _GArrowStringDataTypeClass GArrowBinaryDataTypeClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowStringDataType * garrow_string_data_type_new(void); #define GARROW_TYPE_LARGE_STRING_DATA_TYPE (garrow_large_string_data_type_get_type()) +GARROW_AVAILABLE_IN_0_17 G_DECLARE_DERIVABLE_TYPE(GArrowLargeStringDataType, garrow_large_string_data_type, GARROW, @@ -363,6 +407,7 @@ GArrowLargeStringDataType * garrow_large_string_data_type_new(void); #define GARROW_TYPE_TEMPORAL_DATA_TYPE (garrow_temporal_data_type_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowTemporalDataType, garrow_temporal_data_type, GARROW, @@ -374,6 +419,7 @@ struct _GArrowTemporalDataTypeClass }; #define GARROW_TYPE_DATE32_DATA_TYPE (garrow_date32_data_type_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowDate32DataType, garrow_date32_data_type, GARROW, @@ -384,10 +430,12 @@ struct _GArrowDate32DataTypeClass GArrowTemporalDataTypeClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowDate32DataType * garrow_date32_data_type_new(void); #define GARROW_TYPE_DATE64_DATA_TYPE (garrow_date64_data_type_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowDate64DataType, garrow_date64_data_type, GARROW, @@ -398,10 +446,12 @@ struct _GArrowDate64DataTypeClass GArrowTemporalDataTypeClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowDate64DataType * garrow_date64_data_type_new(void); #define GARROW_TYPE_TIMESTAMP_DATA_TYPE (garrow_timestamp_data_type_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowTimestampDataType, garrow_timestamp_data_type, GARROW, @@ -412,12 +462,16 @@ struct _GArrowTimestampDataTypeClass GArrowTemporalDataTypeClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowTimestampDataType * garrow_timestamp_data_type_new(GArrowTimeUnit unit, GTimeZone *time_zone); + +GARROW_AVAILABLE_IN_ALL GArrowTimeUnit garrow_timestamp_data_type_get_unit(GArrowTimestampDataType *data_type); #define GARROW_TYPE_TIME_DATA_TYPE (garrow_time_data_type_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowTimeDataType, garrow_time_data_type, GARROW, @@ -428,10 +482,12 @@ struct _GArrowTimeDataTypeClass GArrowTemporalDataTypeClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowTimeUnit garrow_time_data_type_get_unit(GArrowTimeDataType *time_data_type); #define GARROW_TYPE_TIME32_DATA_TYPE (garrow_time32_data_type_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowTime32DataType, garrow_time32_data_type, GARROW, @@ -442,10 +498,12 @@ struct _GArrowTime32DataTypeClass GArrowTimeDataTypeClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowTime32DataType * garrow_time32_data_type_new(GArrowTimeUnit unit, GError **error); #define GARROW_TYPE_TIME64_DATA_TYPE (garrow_time64_data_type_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowTime64DataType, garrow_time64_data_type, GARROW, @@ -456,10 +514,12 @@ struct _GArrowTime64DataTypeClass GArrowTimeDataTypeClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowTime64DataType * garrow_time64_data_type_new(GArrowTimeUnit unit, GError **error); #define GARROW_TYPE_INTERVAL_DATA_TYPE (garrow_interval_data_type_get_type()) +GARROW_AVAILABLE_IN_7_0 G_DECLARE_DERIVABLE_TYPE(GArrowIntervalDataType, garrow_interval_data_type, GARROW, @@ -475,6 +535,7 @@ GArrowIntervalType garrow_interval_data_type_get_interval_type(GArrowIntervalDataType *type); #define GARROW_TYPE_MONTH_INTERVAL_DATA_TYPE (garrow_month_interval_data_type_get_type()) +GARROW_AVAILABLE_IN_7_0 G_DECLARE_DERIVABLE_TYPE(GArrowMonthIntervalDataType, garrow_month_interval_data_type, GARROW, @@ -491,6 +552,7 @@ garrow_month_interval_data_type_new(void); #define GARROW_TYPE_DAY_TIME_INTERVAL_DATA_TYPE \ (garrow_day_time_interval_data_type_get_type()) +GARROW_AVAILABLE_IN_7_0 G_DECLARE_DERIVABLE_TYPE(GArrowDayTimeIntervalDataType, garrow_day_time_interval_data_type, GARROW, @@ -507,6 +569,7 @@ garrow_day_time_interval_data_type_new(void); #define GARROW_TYPE_MONTH_DAY_NANO_INTERVAL_DATA_TYPE \ (garrow_month_day_nano_interval_data_type_get_type()) +GARROW_AVAILABLE_IN_7_0 G_DECLARE_DERIVABLE_TYPE(GArrowMonthDayNanoIntervalDataType, garrow_month_day_nano_interval_data_type, GARROW, @@ -522,6 +585,7 @@ GArrowMonthDayNanoIntervalDataType * garrow_month_day_nano_interval_data_type_new(void); #define GARROW_TYPE_DECIMAL_DATA_TYPE (garrow_decimal_data_type_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowDecimalDataType, garrow_decimal_data_type, GARROW, @@ -532,14 +596,20 @@ struct _GArrowDecimalDataTypeClass GArrowFixedSizeBinaryDataTypeClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowDecimalDataType * garrow_decimal_data_type_new(gint32 precision, gint32 scale, GError **error); + +GARROW_AVAILABLE_IN_ALL gint32 garrow_decimal_data_type_get_precision(GArrowDecimalDataType *decimal_data_type); + +GARROW_AVAILABLE_IN_ALL gint32 garrow_decimal_data_type_get_scale(GArrowDecimalDataType *decimal_data_type); #define GARROW_TYPE_DECIMAL128_DATA_TYPE (garrow_decimal128_data_type_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowDecimal128DataType, garrow_decimal128_data_type, GARROW, @@ -559,6 +629,7 @@ GArrowDecimal128DataType * garrow_decimal128_data_type_new(gint32 precision, gint32 scale, GError **error); #define GARROW_TYPE_DECIMAL256_DATA_TYPE (garrow_decimal256_data_type_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowDecimal256DataType, garrow_decimal256_data_type, GARROW, @@ -578,6 +649,7 @@ GArrowDecimal256DataType * garrow_decimal256_data_type_new(gint32 precision, gint32 scale, GError **error); #define GARROW_TYPE_EXTENSION_DATA_TYPE (garrow_extension_data_type_get_type()) +GARROW_AVAILABLE_IN_3_0 G_DECLARE_DERIVABLE_TYPE(GArrowExtensionDataType, garrow_extension_data_type, GARROW, @@ -628,6 +700,7 @@ garrow_extension_data_type_wrap_chunked_array(GArrowExtensionDataType *data_type #define GARROW_TYPE_EXTENSION_DATA_TYPE_REGISTRY \ (garrow_extension_data_type_registry_get_type()) +GARROW_AVAILABLE_IN_3_0 G_DECLARE_DERIVABLE_TYPE(GArrowExtensionDataTypeRegistry, garrow_extension_data_type_registry, GARROW, diff --git a/c_glib/arrow-glib/buffer.h b/c_glib/arrow-glib/buffer.h index 8f93a5ef0ddb2..29308e935aba2 100644 --- a/c_glib/arrow-glib/buffer.h +++ b/c_glib/arrow-glib/buffer.h @@ -21,44 +21,70 @@ #include +#include + G_BEGIN_DECLS #define GARROW_TYPE_BUFFER (garrow_buffer_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowBuffer, garrow_buffer, GARROW, BUFFER, GObject) struct _GArrowBufferClass { GObjectClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowBuffer * garrow_buffer_new(const guint8 *data, gint64 size); + +GARROW_AVAILABLE_IN_ALL GArrowBuffer * garrow_buffer_new_bytes(GBytes *data); + +GARROW_AVAILABLE_IN_ALL gboolean garrow_buffer_equal(GArrowBuffer *buffer, GArrowBuffer *other_buffer); + +GARROW_AVAILABLE_IN_ALL gboolean garrow_buffer_equal_n_bytes(GArrowBuffer *buffer, GArrowBuffer *other_buffer, gint64 n_bytes); + +GARROW_AVAILABLE_IN_ALL gboolean garrow_buffer_is_mutable(GArrowBuffer *buffer); + +GARROW_AVAILABLE_IN_ALL gint64 garrow_buffer_get_capacity(GArrowBuffer *buffer); + +GARROW_AVAILABLE_IN_ALL GBytes * garrow_buffer_get_data(GArrowBuffer *buffer); + +GARROW_AVAILABLE_IN_ALL GBytes * garrow_buffer_get_mutable_data(GArrowBuffer *buffer); + +GARROW_AVAILABLE_IN_ALL gint64 garrow_buffer_get_size(GArrowBuffer *buffer); + +GARROW_AVAILABLE_IN_ALL GArrowBuffer * garrow_buffer_get_parent(GArrowBuffer *buffer); +GARROW_AVAILABLE_IN_ALL GArrowBuffer * garrow_buffer_copy(GArrowBuffer *buffer, gint64 start, gint64 size, GError **error); + +GARROW_AVAILABLE_IN_ALL GArrowBuffer * garrow_buffer_slice(GArrowBuffer *buffer, gint64 offset, gint64 size); #define GARROW_TYPE_MUTABLE_BUFFER (garrow_mutable_buffer_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE( GArrowMutableBuffer, garrow_mutable_buffer, GARROW, MUTABLE_BUFFER, GArrowBuffer) struct _GArrowMutableBufferClass @@ -66,12 +92,19 @@ struct _GArrowMutableBufferClass GArrowBufferClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowMutableBuffer * garrow_mutable_buffer_new(guint8 *data, gint64 size); + +GARROW_AVAILABLE_IN_ALL GArrowMutableBuffer * garrow_mutable_buffer_new_bytes(GBytes *data); + +GARROW_AVAILABLE_IN_ALL GArrowMutableBuffer * garrow_mutable_buffer_slice(GArrowMutableBuffer *buffer, gint64 offset, gint64 size); + +GARROW_AVAILABLE_IN_ALL gboolean garrow_mutable_buffer_set_data(GArrowMutableBuffer *buffer, gint64 offset, @@ -80,6 +113,7 @@ garrow_mutable_buffer_set_data(GArrowMutableBuffer *buffer, GError **error); #define GARROW_TYPE_RESIZABLE_BUFFER (garrow_resizable_buffer_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowResizableBuffer, garrow_resizable_buffer, GARROW, @@ -90,12 +124,17 @@ struct _GArrowResizableBufferClass GArrowMutableBufferClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowResizableBuffer * garrow_resizable_buffer_new(gint64 initial_size, GError **error); + +GARROW_AVAILABLE_IN_ALL gboolean garrow_resizable_buffer_resize(GArrowResizableBuffer *buffer, gint64 new_size, GError **error); + +GARROW_AVAILABLE_IN_ALL gboolean garrow_resizable_buffer_reserve(GArrowResizableBuffer *buffer, gint64 new_capacity, diff --git a/c_glib/arrow-glib/buffer.hpp b/c_glib/arrow-glib/buffer.hpp index 5d922371c3b6b..7e4d7ecee1c1c 100644 --- a/c_glib/arrow-glib/buffer.hpp +++ b/c_glib/arrow-glib/buffer.hpp @@ -23,20 +23,32 @@ #include +GARROW_EXTERN GArrowBuffer * garrow_buffer_new_raw(std::shared_ptr *arrow_buffer); + +GARROW_EXTERN GArrowBuffer * garrow_buffer_new_raw_bytes(std::shared_ptr *arrow_buffer, GBytes *data); + +GARROW_EXTERN GArrowBuffer * garrow_buffer_new_raw_parent(std::shared_ptr *arrow_buffer, GArrowBuffer *parent); + +GARROW_EXTERN std::shared_ptr garrow_buffer_get_raw(GArrowBuffer *buffer); +GARROW_EXTERN GArrowMutableBuffer * garrow_mutable_buffer_new_raw(std::shared_ptr *arrow_buffer); + +GARROW_EXTERN GArrowMutableBuffer * garrow_mutable_buffer_new_raw_bytes(std::shared_ptr *arrow_buffer, GBytes *data); + +GARROW_EXTERN GArrowResizableBuffer * garrow_resizable_buffer_new_raw(std::shared_ptr *arrow_buffer); diff --git a/c_glib/arrow-glib/chunked-array-definition.h b/c_glib/arrow-glib/chunked-array-definition.h index b687735419eeb..744f1077ea754 100644 --- a/c_glib/arrow-glib/chunked-array-definition.h +++ b/c_glib/arrow-glib/chunked-array-definition.h @@ -24,6 +24,7 @@ G_BEGIN_DECLS #define GARROW_TYPE_CHUNKED_ARRAY (garrow_chunked_array_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE( GArrowChunkedArray, garrow_chunked_array, GARROW, CHUNKED_ARRAY, GObject) struct _GArrowChunkedArrayClass diff --git a/c_glib/arrow-glib/chunked-array.h b/c_glib/arrow-glib/chunked-array.h index 6ca497942ff2e..712d16504f624 100644 --- a/c_glib/arrow-glib/chunked-array.h +++ b/c_glib/arrow-glib/chunked-array.h @@ -24,42 +24,61 @@ G_BEGIN_DECLS +GARROW_AVAILABLE_IN_ALL GArrowChunkedArray * garrow_chunked_array_new(GList *chunks, GError **error); + GARROW_AVAILABLE_IN_11_0 GArrowChunkedArray * garrow_chunked_array_new_empty(GArrowDataType *data_type, GError **error); +GARROW_AVAILABLE_IN_ALL gboolean garrow_chunked_array_equal(GArrowChunkedArray *chunked_array, GArrowChunkedArray *other_chunked_array); +GARROW_AVAILABLE_IN_ALL GArrowDataType * garrow_chunked_array_get_value_data_type(GArrowChunkedArray *chunked_array); + +GARROW_AVAILABLE_IN_ALL GArrowType garrow_chunked_array_get_value_type(GArrowChunkedArray *chunked_array); GARROW_DEPRECATED_IN_0_15_FOR(garrow_chunked_array_get_n_rows) guint64 garrow_chunked_array_get_length(GArrowChunkedArray *chunked_array); + GARROW_AVAILABLE_IN_0_15 guint64 garrow_chunked_array_get_n_rows(GArrowChunkedArray *chunked_array); + +GARROW_AVAILABLE_IN_ALL guint64 garrow_chunked_array_get_n_nulls(GArrowChunkedArray *chunked_array); + +GARROW_AVAILABLE_IN_ALL guint garrow_chunked_array_get_n_chunks(GArrowChunkedArray *chunked_array); +GARROW_AVAILABLE_IN_ALL GArrowArray * garrow_chunked_array_get_chunk(GArrowChunkedArray *chunked_array, guint i); + +GARROW_AVAILABLE_IN_ALL GList * garrow_chunked_array_get_chunks(GArrowChunkedArray *chunked_array); + +GARROW_AVAILABLE_IN_ALL GArrowChunkedArray * garrow_chunked_array_slice(GArrowChunkedArray *chunked_array, guint64 offset, guint64 length); + +GARROW_AVAILABLE_IN_ALL gchar * garrow_chunked_array_to_string(GArrowChunkedArray *chunked_array, GError **error); + GARROW_AVAILABLE_IN_4_0 GArrowArray * garrow_chunked_array_combine(GArrowChunkedArray *chunked_array, GError **error); diff --git a/c_glib/arrow-glib/chunked-array.hpp b/c_glib/arrow-glib/chunked-array.hpp index 9ce6cc76adfbf..674ef9606b96e 100644 --- a/c_glib/arrow-glib/chunked-array.hpp +++ b/c_glib/arrow-glib/chunked-array.hpp @@ -23,10 +23,15 @@ #include +GARROW_EXTERN GArrowChunkedArray * garrow_chunked_array_new_raw(std::shared_ptr *arrow_chunked_array); + +GARROW_EXTERN GArrowChunkedArray * garrow_chunked_array_new_raw(std::shared_ptr *arrow_chunked_array, GArrowDataType *data_type); + +GARROW_EXTERN std::shared_ptr garrow_chunked_array_get_raw(GArrowChunkedArray *chunked_array); diff --git a/c_glib/arrow-glib/codec.h b/c_glib/arrow-glib/codec.h index 9b8611bb0a7ee..5865634a7d8e4 100644 --- a/c_glib/arrow-glib/codec.h +++ b/c_glib/arrow-glib/codec.h @@ -50,20 +50,25 @@ typedef enum { } GArrowCompressionType; #define GARROW_TYPE_CODEC (garrow_codec_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowCodec, garrow_codec, GARROW, CODEC, GObject) struct _GArrowCodecClass { GObjectClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowCodec * garrow_codec_new(GArrowCompressionType type, GError **error); +GARROW_AVAILABLE_IN_ALL const gchar * garrow_codec_get_name(GArrowCodec *codec); + GARROW_AVAILABLE_IN_2_0 GArrowCompressionType garrow_codec_get_compression_type(GArrowCodec *codec); + GARROW_AVAILABLE_IN_2_0 gint garrow_codec_get_compression_level(GArrowCodec *codec); diff --git a/c_glib/arrow-glib/codec.hpp b/c_glib/arrow-glib/codec.hpp index f4cfaba18a00e..baea842ddf6b5 100644 --- a/c_glib/arrow-glib/codec.hpp +++ b/c_glib/arrow-glib/codec.hpp @@ -23,12 +23,18 @@ #include +GARROW_EXTERN GArrowCompressionType garrow_compression_type_from_raw(arrow::Compression::type arrow_type); + +GARROW_EXTERN arrow::Compression::type garrow_compression_type_to_raw(GArrowCompressionType type); +GARROW_EXTERN GArrowCodec * garrow_codec_new_raw(std::shared_ptr *arrow_codec); + +GARROW_EXTERN std::shared_ptr garrow_codec_get_raw(GArrowCodec *codec); diff --git a/c_glib/arrow-glib/composite-array.h b/c_glib/arrow-glib/composite-array.h index c6e19f1c74e22..b8ba901363d0a 100644 --- a/c_glib/arrow-glib/composite-array.h +++ b/c_glib/arrow-glib/composite-array.h @@ -27,6 +27,7 @@ G_BEGIN_DECLS #define GARROW_TYPE_LIST_ARRAY (garrow_list_array_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE( GArrowListArray, garrow_list_array, GARROW, LIST_ARRAY, GArrowArray) struct _GArrowListArrayClass @@ -34,6 +35,7 @@ struct _GArrowListArrayClass GArrowArrayClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowListArray * garrow_list_array_new(GArrowDataType *data_type, gint64 length, @@ -42,24 +44,32 @@ garrow_list_array_new(GArrowDataType *data_type, GArrowBuffer *null_bitmap, gint64 n_nulls); +GARROW_AVAILABLE_IN_ALL GArrowDataType * garrow_list_array_get_value_type(GArrowListArray *array); + +GARROW_AVAILABLE_IN_ALL GArrowArray * garrow_list_array_get_value(GArrowListArray *array, gint64 i); + GARROW_AVAILABLE_IN_2_0 GArrowArray * garrow_list_array_get_values(GArrowListArray *array); + GARROW_AVAILABLE_IN_2_0 gint32 garrow_list_array_get_value_offset(GArrowListArray *array, gint64 i); + GARROW_AVAILABLE_IN_2_0 gint32 garrow_list_array_get_value_length(GArrowListArray *array, gint64 i); + GARROW_AVAILABLE_IN_2_0 const gint32 * garrow_list_array_get_value_offsets(GArrowListArray *array, gint64 *n_offsets); #define GARROW_TYPE_LARGE_LIST_ARRAY (garrow_large_list_array_get_type()) +GARROW_AVAILABLE_IN_0_16 G_DECLARE_DERIVABLE_TYPE( GArrowLargeListArray, garrow_large_list_array, GARROW, LARGE_LIST_ARRAY, GArrowArray) struct _GArrowLargeListArrayClass @@ -79,23 +89,29 @@ garrow_large_list_array_new(GArrowDataType *data_type, GARROW_AVAILABLE_IN_0_16 GArrowDataType * garrow_large_list_array_get_value_type(GArrowLargeListArray *array); + GARROW_AVAILABLE_IN_0_16 GArrowArray * garrow_large_list_array_get_value(GArrowLargeListArray *array, gint64 i); + GARROW_AVAILABLE_IN_2_0 GArrowArray * garrow_large_list_array_get_values(GArrowLargeListArray *array); + GARROW_AVAILABLE_IN_2_0 gint64 garrow_large_list_array_get_value_offset(GArrowLargeListArray *array, gint64 i); + GARROW_AVAILABLE_IN_2_0 gint64 garrow_large_list_array_get_value_length(GArrowLargeListArray *array, gint64 i); + GARROW_AVAILABLE_IN_2_0 const gint64 * garrow_large_list_array_get_value_offsets(GArrowLargeListArray *array, gint64 *n_offsets); #define GARROW_TYPE_STRUCT_ARRAY (garrow_struct_array_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE( GArrowStructArray, garrow_struct_array, GARROW, STRUCT_ARRAY, GArrowArray) struct _GArrowStructArrayClass @@ -103,6 +119,7 @@ struct _GArrowStructArrayClass GArrowArrayClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowStructArray * garrow_struct_array_new(GArrowDataType *data_type, gint64 length, @@ -110,9 +127,11 @@ garrow_struct_array_new(GArrowDataType *data_type, GArrowBuffer *null_bitmap, gint64 n_nulls); +GARROW_AVAILABLE_IN_ALL GArrowArray * garrow_struct_array_get_field(GArrowStructArray *array, gint i); +GARROW_AVAILABLE_IN_ALL GList * garrow_struct_array_get_fields(GArrowStructArray *array); @@ -121,6 +140,7 @@ GList * garrow_struct_array_flatten(GArrowStructArray *array, GError **error); #define GARROW_TYPE_MAP_ARRAY (garrow_map_array_get_type()) +GARROW_AVAILABLE_IN_0_17 G_DECLARE_DERIVABLE_TYPE( GArrowMapArray, garrow_map_array, GARROW, MAP_ARRAY, GArrowListArray) struct _GArrowMapArrayClass @@ -134,14 +154,17 @@ garrow_map_array_new(GArrowArray *offsets, GArrowArray *keys, GArrowArray *items, GError **error); + GARROW_AVAILABLE_IN_0_17 GArrowArray * garrow_map_array_get_keys(GArrowMapArray *array); + GARROW_AVAILABLE_IN_0_17 GArrowArray * garrow_map_array_get_items(GArrowMapArray *array); #define GARROW_TYPE_UNION_ARRAY (garrow_union_array_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE( GArrowUnionArray, garrow_union_array, GARROW, UNION_ARRAY, GArrowArray) struct _GArrowUnionArrayClass @@ -152,13 +175,17 @@ struct _GArrowUnionArrayClass GARROW_AVAILABLE_IN_12_0 gint8 garrow_union_array_get_type_code(GArrowUnionArray *array, gint64 i); + GARROW_AVAILABLE_IN_12_0 gint garrow_union_array_get_child_id(GArrowUnionArray *array, gint64 i); + +GARROW_AVAILABLE_IN_ALL GArrowArray * garrow_union_array_get_field(GArrowUnionArray *array, gint i); #define GARROW_TYPE_SPARSE_UNION_ARRAY (garrow_sparse_union_array_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowSparseUnionArray, garrow_sparse_union_array, GARROW, @@ -169,8 +196,11 @@ struct _GArrowSparseUnionArrayClass GArrowUnionArrayClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowSparseUnionArray * garrow_sparse_union_array_new(GArrowInt8Array *type_ids, GList *fields, GError **error); + +GARROW_AVAILABLE_IN_ALL GArrowSparseUnionArray * garrow_sparse_union_array_new_data_type(GArrowSparseUnionDataType *data_type, GArrowInt8Array *type_ids, @@ -178,6 +208,7 @@ garrow_sparse_union_array_new_data_type(GArrowSparseUnionDataType *data_type, GError **error); #define GARROW_TYPE_DENSE_UNION_ARRAY (garrow_dense_union_array_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowDenseUnionArray, garrow_dense_union_array, GARROW, @@ -188,22 +219,27 @@ struct _GArrowDenseUnionArrayClass GArrowUnionArrayClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowDenseUnionArray * garrow_dense_union_array_new(GArrowInt8Array *type_ids, GArrowInt32Array *value_offsets, GList *fields, GError **error); + +GARROW_AVAILABLE_IN_ALL GArrowDenseUnionArray * garrow_dense_union_array_new_data_type(GArrowDenseUnionDataType *data_type, GArrowInt8Array *type_ids, GArrowInt32Array *value_offsets, GList *fields, GError **error); + GARROW_AVAILABLE_IN_12_0 gint32 garrow_dense_union_array_get_value_offset(GArrowDenseUnionArray *array, gint64 i); #define GARROW_TYPE_DICTIONARY_ARRAY (garrow_dictionary_array_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE( GArrowDictionaryArray, garrow_dictionary_array, GARROW, DICTIONARY_ARRAY, GArrowArray) struct _GArrowDictionaryArrayClass @@ -211,22 +247,29 @@ struct _GArrowDictionaryArrayClass GArrowArrayClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowDictionaryArray * garrow_dictionary_array_new(GArrowDataType *data_type, GArrowArray *indices, GArrowArray *dictionary, GError **error); +GARROW_AVAILABLE_IN_ALL GArrowArray * garrow_dictionary_array_get_indices(GArrowDictionaryArray *array); + +GARROW_AVAILABLE_IN_ALL GArrowArray * garrow_dictionary_array_get_dictionary(GArrowDictionaryArray *array); + #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_1_0_FOR(garrow_array_get_value_data_type) GArrowDictionaryDataType * garrow_dictionary_array_get_dictionary_data_type(GArrowDictionaryArray *array); #endif #define GARROW_TYPE_RUN_END_ENCODED_ARRAY (garrow_run_end_encoded_array_get_type()) +GARROW_AVAILABLE_IN_13_0 G_DECLARE_DERIVABLE_TYPE(GArrowRunEndEncodedArray, garrow_run_end_encoded_array, GARROW, @@ -248,9 +291,11 @@ garrow_run_end_encoded_array_new(GArrowDataType *data_type, GARROW_AVAILABLE_IN_13_0 GArrowArray * garrow_run_end_encoded_array_get_run_ends(GArrowRunEndEncodedArray *array); + GARROW_AVAILABLE_IN_13_0 GArrowArray * garrow_run_end_encoded_array_get_values(GArrowRunEndEncodedArray *array); + GARROW_AVAILABLE_IN_13_0 GArrowArray * garrow_run_end_encoded_array_get_logical_run_ends(GArrowRunEndEncodedArray *array, @@ -258,9 +303,11 @@ garrow_run_end_encoded_array_get_logical_run_ends(GArrowRunEndEncodedArray *arra GARROW_AVAILABLE_IN_13_0 GArrowArray * garrow_run_end_encoded_array_get_logical_values(GArrowRunEndEncodedArray *array); + GARROW_AVAILABLE_IN_13_0 gint64 garrow_run_end_encoded_array_find_physical_offset(GArrowRunEndEncodedArray *array); + GARROW_AVAILABLE_IN_13_0 gint64 garrow_run_end_encoded_array_find_physical_length(GArrowRunEndEncodedArray *array); diff --git a/c_glib/arrow-glib/composite-data-type.h b/c_glib/arrow-glib/composite-data-type.h index e71d277a305c6..7a0a462af00f9 100644 --- a/c_glib/arrow-glib/composite-data-type.h +++ b/c_glib/arrow-glib/composite-data-type.h @@ -27,6 +27,7 @@ G_BEGIN_DECLS #define GARROW_TYPE_LIST_DATA_TYPE (garrow_list_data_type_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE( GArrowListDataType, garrow_list_data_type, GARROW, LIST_DATA_TYPE, GArrowDataType) struct _GArrowListDataTypeClass @@ -34,18 +35,23 @@ struct _GArrowListDataTypeClass GArrowDataTypeClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowListDataType * garrow_list_data_type_new(GArrowField *field); + #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_0_13_FOR(garrow_list_data_type_get_field) GArrowField * garrow_list_data_type_get_value_field(GArrowListDataType *list_data_type); #endif + GARROW_AVAILABLE_IN_0_13 GArrowField * garrow_list_data_type_get_field(GArrowListDataType *list_data_type); #define GARROW_TYPE_LARGE_LIST_DATA_TYPE (garrow_large_list_data_type_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowLargeListDataType, garrow_large_list_data_type, GARROW, @@ -59,11 +65,13 @@ struct _GArrowLargeListDataTypeClass GARROW_AVAILABLE_IN_0_16 GArrowLargeListDataType * garrow_large_list_data_type_new(GArrowField *field); + GARROW_AVAILABLE_IN_0_16 GArrowField * garrow_large_list_data_type_get_field(GArrowLargeListDataType *large_list_data_type); #define GARROW_TYPE_STRUCT_DATA_TYPE (garrow_struct_data_type_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE( GArrowStructDataType, garrow_struct_data_type, GARROW, STRUCT_DATA_TYPE, GArrowDataType) struct _GArrowStructDataTypeClass @@ -71,22 +79,34 @@ struct _GArrowStructDataTypeClass GArrowDataTypeClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowStructDataType * garrow_struct_data_type_new(GList *fields); + +GARROW_AVAILABLE_IN_ALL gint garrow_struct_data_type_get_n_fields(GArrowStructDataType *struct_data_type); + +GARROW_AVAILABLE_IN_ALL GList * garrow_struct_data_type_get_fields(GArrowStructDataType *struct_data_type); + +GARROW_AVAILABLE_IN_ALL GArrowField * garrow_struct_data_type_get_field(GArrowStructDataType *struct_data_type, gint i); + +GARROW_AVAILABLE_IN_ALL GArrowField * garrow_struct_data_type_get_field_by_name(GArrowStructDataType *struct_data_type, const gchar *name); + +GARROW_AVAILABLE_IN_ALL gint garrow_struct_data_type_get_field_index(GArrowStructDataType *struct_data_type, const gchar *name); #define GARROW_TYPE_MAP_DATA_TYPE (garrow_map_data_type_get_type()) +GARROW_AVAILABLE_IN_0_17 G_DECLARE_DERIVABLE_TYPE( GArrowMapDataType, garrow_map_data_type, GARROW, MAP_DATA_TYPE, GArrowListDataType) struct _GArrowMapDataTypeClass @@ -105,6 +125,7 @@ GArrowDataType * garrow_map_data_type_get_item_type(GArrowMapDataType *map_data_type); #define GARROW_TYPE_UNION_DATA_TYPE (garrow_union_data_type_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE( GArrowUnionDataType, garrow_union_data_type, GARROW, UNION_DATA_TYPE, GArrowDataType) struct _GArrowUnionDataTypeClass @@ -112,17 +133,25 @@ struct _GArrowUnionDataTypeClass GArrowDataTypeClass parent_class; }; +GARROW_AVAILABLE_IN_ALL gint garrow_union_data_type_get_n_fields(GArrowUnionDataType *union_data_type); + +GARROW_AVAILABLE_IN_ALL GList * garrow_union_data_type_get_fields(GArrowUnionDataType *union_data_type); + +GARROW_AVAILABLE_IN_ALL GArrowField * garrow_union_data_type_get_field(GArrowUnionDataType *union_data_type, gint i); + +GARROW_AVAILABLE_IN_ALL gint8 * garrow_union_data_type_get_type_codes(GArrowUnionDataType *union_data_type, gsize *n_type_codes); #define GARROW_TYPE_SPARSE_UNION_DATA_TYPE (garrow_sparse_union_data_type_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowSparseUnionDataType, garrow_sparse_union_data_type, GARROW, @@ -133,10 +162,12 @@ struct _GArrowSparseUnionDataTypeClass GArrowUnionDataTypeClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowSparseUnionDataType * garrow_sparse_union_data_type_new(GList *fields, gint8 *type_codes, gsize n_type_codes); #define GARROW_TYPE_DENSE_UNION_DATA_TYPE (garrow_dense_union_data_type_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowDenseUnionDataType, garrow_dense_union_data_type, GARROW, @@ -147,10 +178,12 @@ struct _GArrowDenseUnionDataTypeClass GArrowUnionDataTypeClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowDenseUnionDataType * garrow_dense_union_data_type_new(GList *fields, gint8 *type_codes, gsize n_type_codes); #define GARROW_TYPE_DICTIONARY_DATA_TYPE (garrow_dictionary_data_type_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowDictionaryDataType, garrow_dictionary_data_type, GARROW, @@ -161,22 +194,29 @@ struct _GArrowDictionaryDataTypeClass GArrowFixedWidthDataTypeClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowDictionaryDataType * garrow_dictionary_data_type_new(GArrowDataType *index_data_type, GArrowDataType *value_data_type, gboolean ordered); + +GARROW_AVAILABLE_IN_ALL GArrowDataType * garrow_dictionary_data_type_get_index_data_type( GArrowDictionaryDataType *dictionary_data_type); + GARROW_AVAILABLE_IN_0_14 GArrowDataType * garrow_dictionary_data_type_get_value_data_type( GArrowDictionaryDataType *dictionary_data_type); + +GARROW_AVAILABLE_IN_ALL gboolean garrow_dictionary_data_type_is_ordered(GArrowDictionaryDataType *dictionary_data_type); #define GARROW_TYPE_RUN_END_ENCODED_DATA_TYPE \ (garrow_run_end_encoded_data_type_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowRunEndEncodedDataType, garrow_run_end_encoded_data_type, GARROW, @@ -195,6 +235,7 @@ GARROW_AVAILABLE_IN_13_0 GArrowDataType * garrow_run_end_encoded_data_type_get_run_end_data_type( GArrowRunEndEncodedDataType *data_type); + GARROW_AVAILABLE_IN_13_0 GArrowDataType * garrow_run_end_encoded_data_type_get_value_data_type( diff --git a/c_glib/arrow-glib/compute-definition.h b/c_glib/arrow-glib/compute-definition.h index b699e9e99a9fc..a060f16f62cf6 100644 --- a/c_glib/arrow-glib/compute-definition.h +++ b/c_glib/arrow-glib/compute-definition.h @@ -21,9 +21,12 @@ #include +#include + G_BEGIN_DECLS #define GARROW_TYPE_FUNCTION_OPTIONS (garrow_function_options_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE( GArrowFunctionOptions, garrow_function_options, GARROW, FUNCTION_OPTIONS, GObject) struct _GArrowFunctionOptionsClass @@ -32,6 +35,7 @@ struct _GArrowFunctionOptionsClass }; #define GARROW_TYPE_CAST_OPTIONS (garrow_cast_options_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE( GArrowCastOptions, garrow_cast_options, GARROW, CAST_OPTIONS, GArrowFunctionOptions) struct _GArrowCastOptionsClass @@ -40,6 +44,7 @@ struct _GArrowCastOptionsClass }; #define GARROW_TYPE_EXPRESSION (garrow_expression_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowExpression, garrow_expression, GARROW, EXPRESSION, GObject) struct _GArrowExpressionClass { diff --git a/c_glib/arrow-glib/compute.h b/c_glib/arrow-glib/compute.h index 029cab136ad8f..54b0ddb014fbb 100644 --- a/c_glib/arrow-glib/compute.h +++ b/c_glib/arrow-glib/compute.h @@ -26,6 +26,7 @@ G_BEGIN_DECLS #define GARROW_TYPE_EXECUTE_CONTEXT (garrow_execute_context_get_type()) +GARROW_AVAILABLE_IN_1_0 G_DECLARE_DERIVABLE_TYPE( GArrowExecuteContext, garrow_execute_context, GARROW, EXECUTE_CONTEXT, GObject) struct _GArrowExecuteContextClass @@ -46,6 +47,7 @@ gchar * garrow_function_options_to_string(GArrowFunctionOptions *options); #define GARROW_TYPE_FUNCTION_DOC (garrow_function_doc_get_type()) +GARROW_AVAILABLE_IN_6_0 G_DECLARE_DERIVABLE_TYPE( GArrowFunctionDoc, garrow_function_doc, GARROW, FUNCTION_DOC, GObject) struct _GArrowFunctionDocClass @@ -67,6 +69,7 @@ gchar * garrow_function_doc_get_options_class_name(GArrowFunctionDoc *doc); #define GARROW_TYPE_FUNCTION (garrow_function_get_type()) +GARROW_AVAILABLE_IN_1_0 G_DECLARE_DERIVABLE_TYPE(GArrowFunction, garrow_function, GARROW, FUNCTION, GObject) struct _GArrowFunctionClass { @@ -110,6 +113,7 @@ gchar * garrow_function_to_string(GArrowFunction *function); #define GARROW_TYPE_EXECUTE_NODE_OPTIONS (garrow_execute_node_options_get_type()) +GARROW_AVAILABLE_IN_6_0 G_DECLARE_DERIVABLE_TYPE(GArrowExecuteNodeOptions, garrow_execute_node_options, GARROW, @@ -121,6 +125,7 @@ struct _GArrowExecuteNodeOptionsClass }; #define GARROW_TYPE_SOURCE_NODE_OPTIONS (garrow_source_node_options_get_type()) +GARROW_AVAILABLE_IN_6_0 G_DECLARE_DERIVABLE_TYPE(GArrowSourceNodeOptions, garrow_source_node_options, GARROW, @@ -142,6 +147,7 @@ GArrowSourceNodeOptions * garrow_source_node_options_new_table(GArrowTable *table); #define GARROW_TYPE_FILTER_NODE_OPTIONS (garrow_filter_node_options_get_type()) +GARROW_AVAILABLE_IN_12_0 G_DECLARE_DERIVABLE_TYPE(GArrowFilterNodeOptions, garrow_filter_node_options, GARROW, @@ -157,6 +163,7 @@ GArrowFilterNodeOptions * garrow_filter_node_options_new(GArrowExpression *expression); #define GARROW_TYPE_PROJECT_NODE_OPTIONS (garrow_project_node_options_get_type()) +GARROW_AVAILABLE_IN_11_0 G_DECLARE_DERIVABLE_TYPE(GArrowProjectNodeOptions, garrow_project_node_options, GARROW, @@ -172,6 +179,7 @@ GArrowProjectNodeOptions * garrow_project_node_options_new(GList *expressions, gchar **names, gsize n_names); #define GARROW_TYPE_AGGREGATION (garrow_aggregation_get_type()) +GARROW_AVAILABLE_IN_6_0 G_DECLARE_DERIVABLE_TYPE( GArrowAggregation, garrow_aggregation, GARROW, AGGREGATION, GObject) struct _GArrowAggregationClass @@ -187,6 +195,7 @@ garrow_aggregation_new(const gchar *function, const gchar *output); #define GARROW_TYPE_AGGREGATE_NODE_OPTIONS (garrow_aggregate_node_options_get_type()) +GARROW_AVAILABLE_IN_6_0 G_DECLARE_DERIVABLE_TYPE(GArrowAggregateNodeOptions, garrow_aggregate_node_options, GARROW, @@ -205,6 +214,7 @@ garrow_aggregate_node_options_new(GList *aggregations, GError **error); #define GARROW_TYPE_SINK_NODE_OPTIONS (garrow_sink_node_options_get_type()) +GARROW_AVAILABLE_IN_6_0 G_DECLARE_DERIVABLE_TYPE(GArrowSinkNodeOptions, garrow_sink_node_options, GARROW, @@ -249,6 +259,7 @@ typedef enum { } GArrowJoinType; #define GARROW_TYPE_HASH_JOIN_NODE_OPTIONS (garrow_hash_join_node_options_get_type()) +GARROW_AVAILABLE_IN_7_0 G_DECLARE_DERIVABLE_TYPE(GArrowHashJoinNodeOptions, garrow_hash_join_node_options, GARROW, @@ -281,6 +292,7 @@ garrow_hash_join_node_options_set_right_outputs(GArrowHashJoinNodeOptions *optio GError **error); #define GARROW_TYPE_EXECUTE_NODE (garrow_execute_node_get_type()) +GARROW_AVAILABLE_IN_6_0 G_DECLARE_DERIVABLE_TYPE( GArrowExecuteNode, garrow_execute_node, GARROW, EXECUTE_NODE, GObject) struct _GArrowExecuteNodeClass @@ -296,6 +308,7 @@ GArrowSchema * garrow_execute_node_get_output_schema(GArrowExecuteNode *node); #define GARROW_TYPE_EXECUTE_PLAN (garrow_execute_plan_get_type()) +GARROW_AVAILABLE_IN_6_0 G_DECLARE_DERIVABLE_TYPE( GArrowExecutePlan, garrow_execute_plan, GARROW, EXECUTE_PLAN, GObject) struct _GArrowExecutePlanClass @@ -365,10 +378,12 @@ GARROW_AVAILABLE_IN_6_0 gboolean garrow_execute_plan_wait(GArrowExecutePlan *plan, GError **error); +GARROW_AVAILABLE_IN_ALL GArrowCastOptions * garrow_cast_options_new(void); #define GARROW_TYPE_SCALAR_AGGREGATE_OPTIONS (garrow_scalar_aggregate_options_get_type()) +GARROW_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE(GArrowScalarAggregateOptions, garrow_scalar_aggregate_options, GARROW, @@ -401,6 +416,7 @@ typedef enum { } GArrowCountMode; #define GARROW_TYPE_COUNT_OPTIONS (garrow_count_options_get_type()) +GARROW_AVAILABLE_IN_6_0 G_DECLARE_DERIVABLE_TYPE( GArrowCountOptions, garrow_count_options, GARROW, COUNT_OPTIONS, GArrowFunctionOptions) struct _GArrowCountOptionsClass @@ -428,6 +444,7 @@ typedef enum { } GArrowFilterNullSelectionBehavior; #define GARROW_TYPE_FILTER_OPTIONS (garrow_filter_options_get_type()) +GARROW_AVAILABLE_IN_0_17 G_DECLARE_DERIVABLE_TYPE(GArrowFilterOptions, garrow_filter_options, GARROW, @@ -443,6 +460,7 @@ GArrowFilterOptions * garrow_filter_options_new(void); #define GARROW_TYPE_TAKE_OPTIONS (garrow_take_options_get_type()) +GARROW_AVAILABLE_IN_0_14 G_DECLARE_DERIVABLE_TYPE( GArrowTakeOptions, garrow_take_options, GARROW, TAKE_OPTIONS, GArrowFunctionOptions) struct _GArrowTakeOptionsClass @@ -487,6 +505,7 @@ typedef enum /**/ { } GArrowNullPlacement; #define GARROW_TYPE_ARRAY_SORT_OPTIONS (garrow_array_sort_options_get_type()) +GARROW_AVAILABLE_IN_3_0 G_DECLARE_DERIVABLE_TYPE(GArrowArraySortOptions, garrow_array_sort_options, GARROW, @@ -506,6 +525,7 @@ garrow_array_sort_options_equal(GArrowArraySortOptions *options, GArrowArraySortOptions *other_options); #define GARROW_TYPE_SORT_KEY (garrow_sort_key_get_type()) +GARROW_AVAILABLE_IN_3_0 G_DECLARE_DERIVABLE_TYPE(GArrowSortKey, garrow_sort_key, GARROW, SORT_KEY, GObject) struct _GArrowSortKeyClass { @@ -521,6 +541,7 @@ gboolean garrow_sort_key_equal(GArrowSortKey *sort_key, GArrowSortKey *other_sort_key); #define GARROW_TYPE_SORT_OPTIONS (garrow_sort_options_get_type()) +GARROW_AVAILABLE_IN_3_0 G_DECLARE_DERIVABLE_TYPE( GArrowSortOptions, garrow_sort_options, GARROW, SORT_OPTIONS, GArrowFunctionOptions) struct _GArrowSortOptionsClass @@ -545,6 +566,7 @@ void garrow_sort_options_add_sort_key(GArrowSortOptions *options, GArrowSortKey *sort_key); #define GARROW_TYPE_SET_LOOKUP_OPTIONS (garrow_set_lookup_options_get_type()) +GARROW_AVAILABLE_IN_6_0 G_DECLARE_DERIVABLE_TYPE(GArrowSetLookupOptions, garrow_set_lookup_options, GARROW, @@ -560,6 +582,7 @@ GArrowSetLookupOptions * garrow_set_lookup_options_new(GArrowDatum *value_set); #define GARROW_TYPE_VARIANCE_OPTIONS (garrow_variance_options_get_type()) +GARROW_AVAILABLE_IN_6_0 G_DECLARE_DERIVABLE_TYPE(GArrowVarianceOptions, garrow_variance_options, GARROW, @@ -620,6 +643,7 @@ typedef enum { } GArrowRoundMode; #define GARROW_TYPE_ROUND_OPTIONS (garrow_round_options_get_type()) +GARROW_AVAILABLE_IN_7_0 G_DECLARE_DERIVABLE_TYPE( GArrowRoundOptions, garrow_round_options, GARROW, ROUND_OPTIONS, GArrowFunctionOptions) struct _GArrowRoundOptionsClass @@ -633,6 +657,7 @@ garrow_round_options_new(void); #define GARROW_TYPE_ROUND_TO_MULTIPLE_OPTIONS \ (garrow_round_to_multiple_options_get_type()) +GARROW_AVAILABLE_IN_7_0 G_DECLARE_DERIVABLE_TYPE(GArrowRoundToMultipleOptions, garrow_round_to_multiple_options, GARROW, @@ -648,6 +673,7 @@ GArrowRoundToMultipleOptions * garrow_round_to_multiple_options_new(void); #define GARROW_TYPE_MATCH_SUBSTRING_OPTIONS (garrow_match_substring_options_get_type()) +GARROW_AVAILABLE_IN_12_0 G_DECLARE_DERIVABLE_TYPE(GArrowMatchSubstringOptions, garrow_match_substring_options, GARROW, @@ -683,6 +709,7 @@ typedef enum /*< underscore_name=garrow_utf8_normalize_form >*/ { } GArrowUTF8NormalizeForm; #define GARROW_TYPE_UTF8_NORMALIZE_OPTIONS (garrow_utf8_normalize_options_get_type()) +GARROW_AVAILABLE_IN_8_0 G_DECLARE_DERIVABLE_TYPE(GArrowUTF8NormalizeOptions, garrow_utf8_normalize_options, GARROW, @@ -719,6 +746,7 @@ typedef enum { } GArrowQuantileInterpolation; #define GARROW_TYPE_QUANTILE_OPTIONS (garrow_quantile_options_get_type()) +GARROW_AVAILABLE_IN_9_0 G_DECLARE_DERIVABLE_TYPE(GArrowQuantileOptions, garrow_quantile_options, GARROW, @@ -745,6 +773,7 @@ garrow_quantile_options_set_qs(GArrowQuantileOptions *options, gsize n); #define GARROW_TYPE_INDEX_OPTIONS (garrow_index_options_get_type()) +GARROW_AVAILABLE_IN_12_0 G_DECLARE_DERIVABLE_TYPE( GArrowIndexOptions, garrow_index_options, GARROW, INDEX_OPTIONS, GArrowFunctionOptions) struct _GArrowIndexOptionsClass @@ -782,6 +811,7 @@ typedef enum { } GArrowRankTiebreaker; #define GARROW_TYPE_RANK_OPTIONS (garrow_rank_options_get_type()) +GARROW_AVAILABLE_IN_12_0 G_DECLARE_DERIVABLE_TYPE( GArrowRankOptions, garrow_rank_options, GARROW, RANK_OPTIONS, GArrowFunctionOptions) struct _GArrowRankOptionsClass @@ -805,18 +835,25 @@ GARROW_AVAILABLE_IN_12_0 void garrow_rank_options_add_sort_key(GArrowRankOptions *options, GArrowSortKey *sort_key); +GARROW_AVAILABLE_IN_ALL GArrowArray * garrow_array_cast(GArrowArray *array, GArrowDataType *target_data_type, GArrowCastOptions *options, GError **error); + +GARROW_AVAILABLE_IN_ALL GArrowArray * garrow_array_unique(GArrowArray *array, GError **error); + +GARROW_AVAILABLE_IN_ALL GArrowDictionaryArray * garrow_array_dictionary_encode(GArrowArray *array, GError **error); + GARROW_AVAILABLE_IN_0_13 gint64 garrow_array_count(GArrowArray *array, GArrowCountOptions *options, GError **error); + GARROW_AVAILABLE_IN_0_13 GArrowStructArray * garrow_array_count_values(GArrowArray *array, GError **error); @@ -987,6 +1024,7 @@ garrow_record_batch_filter(GArrowRecordBatch *record_batch, GError **error); #define GARROW_TYPE_RUN_END_ENCODE_OPTIONS (garrow_run_end_encode_options_get_type()) +GARROW_AVAILABLE_IN_13_0 G_DECLARE_DERIVABLE_TYPE(GArrowRunEndEncodeOptions, garrow_run_end_encode_options, GARROW, @@ -1011,6 +1049,7 @@ GArrowArray * garrow_run_end_encoded_array_decode(GArrowRunEndEncodedArray *array, GError **error); #define GARROW_TYPE_STRPTIME_OPTIONS (garrow_strptime_options_get_type()) +GARROW_AVAILABLE_IN_16_0 G_DECLARE_DERIVABLE_TYPE(GArrowStrptimeOptions, garrow_strptime_options, GARROW, @@ -1026,6 +1065,7 @@ GArrowStrptimeOptions * garrow_strptime_options_new(void); #define GARROW_TYPE_STRFTIME_OPTIONS (garrow_strftime_options_get_type()) +GARROW_AVAILABLE_IN_16_0 G_DECLARE_DERIVABLE_TYPE(GArrowStrftimeOptions, garrow_strftime_options, GARROW, @@ -1041,6 +1081,7 @@ GArrowStrftimeOptions * garrow_strftime_options_new(void); #define GARROW_TYPE_SPLIT_PATTERN_OPTIONS (garrow_split_pattern_options_get_type()) +GARROW_AVAILABLE_IN_16_0 G_DECLARE_DERIVABLE_TYPE(GArrowSplitPatternOptions, garrow_split_pattern_options, GARROW, @@ -1056,6 +1097,7 @@ GArrowSplitPatternOptions * garrow_split_pattern_options_new(void); #define GARROW_TYPE_STRUCT_FIELD_OPTIONS (garrow_struct_field_options_get_type()) +GARROW_AVAILABLE_IN_16_0 G_DECLARE_DERIVABLE_TYPE(GArrowStructFieldOptions, garrow_struct_field_options, GARROW, diff --git a/c_glib/arrow-glib/datum.h b/c_glib/arrow-glib/datum.h index df5e9a1c2cf4f..fc9a2fe7ab907 100644 --- a/c_glib/arrow-glib/datum.h +++ b/c_glib/arrow-glib/datum.h @@ -28,6 +28,7 @@ G_BEGIN_DECLS #define GARROW_TYPE_DATUM (garrow_datum_get_type()) +GARROW_AVAILABLE_IN_1_0 G_DECLARE_DERIVABLE_TYPE(GArrowDatum, garrow_datum, GARROW, DATUM, GObject) struct _GArrowDatumClass { @@ -60,6 +61,7 @@ garrow_datum_to_string(GArrowDatum *datum); /* GARROW_TYPE_NONE_DATUM */ #define GARROW_TYPE_SCALAR_DATUM (garrow_scalar_datum_get_type()) +GARROW_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE( GArrowScalarDatum, garrow_scalar_datum, GARROW, SCALAR_DATUM, GArrowDatum) struct _GArrowScalarDatumClass @@ -72,6 +74,7 @@ GArrowScalarDatum * garrow_scalar_datum_new(GArrowScalar *value); #define GARROW_TYPE_ARRAY_DATUM (garrow_array_datum_get_type()) +GARROW_AVAILABLE_IN_1_0 G_DECLARE_DERIVABLE_TYPE( GArrowArrayDatum, garrow_array_datum, GARROW, ARRAY_DATUM, GArrowDatum) struct _GArrowArrayDatumClass @@ -84,6 +87,7 @@ GArrowArrayDatum * garrow_array_datum_new(GArrowArray *value); #define GARROW_TYPE_CHUNKED_ARRAY_DATUM (garrow_chunked_array_datum_get_type()) +GARROW_AVAILABLE_IN_1_0 G_DECLARE_DERIVABLE_TYPE(GArrowChunkedArrayDatum, garrow_chunked_array_datum, GARROW, @@ -99,6 +103,7 @@ GArrowChunkedArrayDatum * garrow_chunked_array_datum_new(GArrowChunkedArray *value); #define GARROW_TYPE_RECORD_BATCH_DATUM (garrow_record_batch_datum_get_type()) +GARROW_AVAILABLE_IN_1_0 G_DECLARE_DERIVABLE_TYPE(GArrowRecordBatchDatum, garrow_record_batch_datum, GARROW, @@ -114,6 +119,7 @@ GArrowRecordBatchDatum * garrow_record_batch_datum_new(GArrowRecordBatch *value); #define GARROW_TYPE_TABLE_DATUM (garrow_table_datum_get_type()) +GARROW_AVAILABLE_IN_1_0 G_DECLARE_DERIVABLE_TYPE( GArrowTableDatum, garrow_table_datum, GARROW, TABLE_DATUM, GArrowDatum) struct _GArrowTableDatumClass diff --git a/c_glib/arrow-glib/decimal.h b/c_glib/arrow-glib/decimal.h index b967fa36d5611..f64afa800a19b 100644 --- a/c_glib/arrow-glib/decimal.h +++ b/c_glib/arrow-glib/decimal.h @@ -27,6 +27,7 @@ G_BEGIN_DECLS /* Disabled because it conflicts with GARROW_TYPE_DECIMAL128 in GArrowType. */ /* #define GARROW_TYPE_DECIMAL128 (garrow_decimal128_get_type()) */ +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowDecimal128, garrow_decimal128, GARROW, DECIMAL128, GObject) struct _GArrowDecimal128Class @@ -34,8 +35,10 @@ struct _GArrowDecimal128Class GObjectClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowDecimal128 * garrow_decimal128_new_string(const gchar *data, GError **error); +GARROW_AVAILABLE_IN_ALL GArrowDecimal128 * garrow_decimal128_new_integer(const gint64 data); GARROW_AVAILABLE_IN_3_0 @@ -62,25 +65,34 @@ GARROW_AVAILABLE_IN_0_12 gboolean garrow_decimal128_greater_than_or_equal(GArrowDecimal128 *decimal, GArrowDecimal128 *other_decimal); +GARROW_AVAILABLE_IN_ALL gchar * garrow_decimal128_to_string_scale(GArrowDecimal128 *decimal, gint32 scale); +GARROW_AVAILABLE_IN_ALL gchar * garrow_decimal128_to_string(GArrowDecimal128 *decimal); GARROW_AVAILABLE_IN_3_0 GBytes * garrow_decimal128_to_bytes(GArrowDecimal128 *decimal); +GARROW_AVAILABLE_IN_ALL void garrow_decimal128_abs(GArrowDecimal128 *decimal); +GARROW_AVAILABLE_IN_ALL void garrow_decimal128_negate(GArrowDecimal128 *decimal); +GARROW_AVAILABLE_IN_ALL gint64 garrow_decimal128_to_integer(GArrowDecimal128 *decimal); +GARROW_AVAILABLE_IN_ALL GArrowDecimal128 * garrow_decimal128_plus(GArrowDecimal128 *left, GArrowDecimal128 *right); +GARROW_AVAILABLE_IN_ALL GArrowDecimal128 * garrow_decimal128_minus(GArrowDecimal128 *left, GArrowDecimal128 *right); +GARROW_AVAILABLE_IN_ALL GArrowDecimal128 * garrow_decimal128_multiply(GArrowDecimal128 *left, GArrowDecimal128 *right); +GARROW_AVAILABLE_IN_ALL GArrowDecimal128 * garrow_decimal128_divide(GArrowDecimal128 *left, GArrowDecimal128 *right, @@ -95,6 +107,7 @@ garrow_decimal128_rescale(GArrowDecimal128 *decimal, /* Disabled because it conflicts with GARROW_TYPE_DECIMAL256 in GArrowType. */ /* #define GARROW_TYPE_DECIMAL256 (garrow_decimal256_get_type()) */ +GARROW_AVAILABLE_IN_3_0 G_DECLARE_DERIVABLE_TYPE(GArrowDecimal256, garrow_decimal256, GARROW, DECIMAL256, GObject) struct _GArrowDecimal256Class diff --git a/c_glib/arrow-glib/error.h b/c_glib/arrow-glib/error.h index 4414417a1a25b..e0c6a591a021b 100644 --- a/c_glib/arrow-glib/error.h +++ b/c_glib/arrow-glib/error.h @@ -21,6 +21,8 @@ #include +#include + G_BEGIN_DECLS /** @@ -66,6 +68,7 @@ typedef enum { #define GARROW_ERROR garrow_error_quark() +GARROW_AVAILABLE_IN_ALL GQuark garrow_error_quark(void); diff --git a/c_glib/arrow-glib/error.hpp b/c_glib/arrow-glib/error.hpp index 90a0f3161878e..c2c9b3c63028a 100644 --- a/c_glib/arrow-glib/error.hpp +++ b/c_glib/arrow-glib/error.hpp @@ -23,18 +23,26 @@ #include +GARROW_EXTERN gboolean garrow_error_check(GError **error, const arrow::Status &status, const char *context); + +GARROW_EXTERN GArrowError garrow_error_from_status(const arrow::Status &status); + +GARROW_EXTERN arrow::StatusCode garrow_error_to_status_code(GError *error, arrow::StatusCode default_code); + +GARROW_EXTERN arrow::Status garrow_error_to_status(GError *error, arrow::StatusCode default_code, const char *context); namespace garrow { + GARROW_EXTERN gboolean check(GError **error, const arrow::Status &status, const char *context); diff --git a/c_glib/arrow-glib/expression.h b/c_glib/arrow-glib/expression.h index 3141ed4df18b7..5a6bfb456fc64 100644 --- a/c_glib/arrow-glib/expression.h +++ b/c_glib/arrow-glib/expression.h @@ -31,6 +31,7 @@ gboolean garrow_expression_equal(GArrowExpression *expression, GArrowExpression *other_expression); #define GARROW_TYPE_LITERAL_EXPRESSION (garrow_literal_expression_get_type()) +GARROW_AVAILABLE_IN_6_0 G_DECLARE_DERIVABLE_TYPE(GArrowLiteralExpression, garrow_literal_expression, GARROW, @@ -46,6 +47,7 @@ GArrowLiteralExpression * garrow_literal_expression_new(GArrowDatum *datum); #define GARROW_TYPE_FIELD_EXPRESSION (garrow_field_expression_get_type()) +GARROW_AVAILABLE_IN_6_0 G_DECLARE_DERIVABLE_TYPE(GArrowFieldExpression, garrow_field_expression, GARROW, @@ -61,6 +63,7 @@ GArrowFieldExpression * garrow_field_expression_new(const gchar *reference, GError **error); #define GARROW_TYPE_CALL_EXPRESSION (garrow_call_expression_get_type()) +GARROW_AVAILABLE_IN_6_0 G_DECLARE_DERIVABLE_TYPE( GArrowCallExpression, garrow_call_expression, GARROW, CALL_EXPRESSION, GArrowExpression) struct _GArrowCallExpressionClass diff --git a/c_glib/arrow-glib/expression.hpp b/c_glib/arrow-glib/expression.hpp index 60d5c9fe2f1bd..cc96badbe67aa 100644 --- a/c_glib/arrow-glib/expression.hpp +++ b/c_glib/arrow-glib/expression.hpp @@ -23,7 +23,10 @@ #include +GARROW_EXTERN GArrowExpression * garrow_expression_new_raw(const arrow::compute::Expression &arrow_expression); + +GARROW_EXTERN arrow::compute::Expression * garrow_expression_get_raw(GArrowExpression *expression); diff --git a/c_glib/arrow-glib/field.h b/c_glib/arrow-glib/field.h index 8de63757878c9..4be13f6135975 100644 --- a/c_glib/arrow-glib/field.h +++ b/c_glib/arrow-glib/field.h @@ -24,6 +24,7 @@ G_BEGIN_DECLS #define GARROW_TYPE_FIELD (garrow_field_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowField, garrow_field, GARROW, FIELD, GObject) struct _GArrowFieldClass { @@ -34,8 +35,10 @@ GARROW_AVAILABLE_IN_6_0 GArrowField * garrow_field_import(gpointer c_abi_schema, GError **error); +GARROW_AVAILABLE_IN_ALL GArrowField * garrow_field_new(const gchar *name, GArrowDataType *data_type); +GARROW_AVAILABLE_IN_ALL GArrowField * garrow_field_new_full(const gchar *name, GArrowDataType *data_type, gboolean nullable); @@ -43,18 +46,26 @@ GARROW_AVAILABLE_IN_6_0 gpointer garrow_field_export(GArrowField *field, GError **error); +GARROW_AVAILABLE_IN_ALL const gchar * garrow_field_get_name(GArrowField *field); + +GARROW_AVAILABLE_IN_ALL GArrowDataType * garrow_field_get_data_type(GArrowField *field); + +GARROW_AVAILABLE_IN_ALL gboolean garrow_field_is_nullable(GArrowField *field); +GARROW_AVAILABLE_IN_ALL gboolean garrow_field_equal(GArrowField *field, GArrowField *other_field); +GARROW_AVAILABLE_IN_ALL gchar * garrow_field_to_string(GArrowField *field); + GARROW_AVAILABLE_IN_3_0 gchar * garrow_field_to_string_metadata(GArrowField *field, gboolean show_metadata); diff --git a/c_glib/arrow-glib/file-system.h b/c_glib/arrow-glib/file-system.h index d3d5fde73fe23..2e500672e145c 100644 --- a/c_glib/arrow-glib/file-system.h +++ b/c_glib/arrow-glib/file-system.h @@ -53,6 +53,7 @@ typedef enum { /* arrow::fs::FileInfo */ #define GARROW_TYPE_FILE_INFO (garrow_file_info_get_type()) +GARROW_AVAILABLE_IN_0_17 G_DECLARE_DERIVABLE_TYPE(GArrowFileInfo, garrow_file_info, GARROW, FILE_INFO, GObject) struct _GArrowFileInfoClass { @@ -80,6 +81,7 @@ garrow_file_info_to_string(GArrowFileInfo *file_info); /* arrow::fs::FileSelector */ #define GARROW_TYPE_FILE_SELECTOR (garrow_file_selector_get_type()) +GARROW_AVAILABLE_IN_0_17 G_DECLARE_DERIVABLE_TYPE( GArrowFileSelector, garrow_file_selector, GARROW, FILE_SELECTOR, GObject) struct _GArrowFileSelectorClass @@ -90,6 +92,7 @@ struct _GArrowFileSelectorClass /* arrow::fs::FileSystem */ #define GARROW_TYPE_FILE_SYSTEM (garrow_file_system_get_type()) +GARROW_AVAILABLE_IN_0_17 G_DECLARE_DERIVABLE_TYPE( GArrowFileSystem, garrow_file_system, GARROW, FILE_SYSTEM, GObject) struct _GArrowFileSystemClass @@ -197,6 +200,7 @@ garrow_file_system_open_append_stream(GArrowFileSystem *file_system, /* arrow::fs::SubTreeFileSystem */ #define GARROW_TYPE_SUB_TREE_FILE_SYSTEM (garrow_sub_tree_file_system_get_type()) +GARROW_AVAILABLE_IN_0_17 G_DECLARE_DERIVABLE_TYPE(GArrowSubTreeFileSystem, garrow_sub_tree_file_system, GARROW, @@ -215,6 +219,7 @@ garrow_sub_tree_file_system_new(const gchar *base_path, /* arrow::fs::SlowFileSystem */ #define GARROW_TYPE_SLOW_FILE_SYSTEM (garrow_slow_file_system_get_type()) +GARROW_AVAILABLE_IN_0_17 G_DECLARE_DERIVABLE_TYPE(GArrowSlowFileSystem, garrow_slow_file_system, GARROW, @@ -244,6 +249,7 @@ garrow_slow_file_system_new_average_latency_and_seed(GArrowFileSystem *base_file gint32 seed); #define GARROW_TYPE_MOCK_FILE_SYSTEM (garrow_mock_file_system_get_type()) +GARROW_AVAILABLE_IN_0_17 G_DECLARE_DERIVABLE_TYPE(GArrowMockFileSystem, garrow_mock_file_system, GARROW, @@ -255,6 +261,7 @@ struct _GArrowMockFileSystemClass }; #define GARROW_TYPE_HDFS_FILE_SYSTEM (garrow_hdfs_file_system_get_type()) +GARROW_AVAILABLE_IN_0_17 G_DECLARE_DERIVABLE_TYPE(GArrowHDFSFileSystem, garrow_hdfs_file_system, GARROW, @@ -290,6 +297,7 @@ typedef enum { } GArrowS3LogLevel; #define GARROW_TYPE_S3_GLOBAL_OPTIONS (garrow_s3_global_options_get_type()) +GARROW_AVAILABLE_IN_7_0 G_DECLARE_DERIVABLE_TYPE( GArrowS3GlobalOptions, garrow_s3_global_options, GARROW, S3_GLOBAL_OPTIONS, GObject) struct _GArrowS3GlobalOptionsClass @@ -312,6 +320,7 @@ gboolean garrow_s3_finalize(GError **error); #define GARROW_TYPE_S3_FILE_SYSTEM (garrow_s3_file_system_get_type()) +GARROW_AVAILABLE_IN_7_0 G_DECLARE_DERIVABLE_TYPE( GArrowS3FileSystem, garrow_s3_file_system, GARROW, S3_FILE_SYSTEM, GArrowFileSystem) struct _GArrowS3FileSystemClass @@ -320,6 +329,7 @@ struct _GArrowS3FileSystemClass }; #define GARROW_TYPE_GCS_FILE_SYSTEM (garrow_gcs_file_system_get_type()) +GARROW_AVAILABLE_IN_7_0 G_DECLARE_DERIVABLE_TYPE( GArrowGCSFileSystem, garrow_gcs_file_system, GARROW, GCS_FILE_SYSTEM, GArrowFileSystem) struct _GArrowGCSFileSystemClass diff --git a/c_glib/arrow-glib/file-system.hpp b/c_glib/arrow-glib/file-system.hpp index f41fc6e9c75b0..c535958301c5c 100644 --- a/c_glib/arrow-glib/file-system.hpp +++ b/c_glib/arrow-glib/file-system.hpp @@ -23,28 +23,35 @@ #include +GARROW_EXTERN GArrowFileInfo * garrow_file_info_new_raw(const arrow::fs::FileInfo &arrow_file_info); +GARROW_EXTERN arrow::fs::FileInfo * garrow_file_info_get_raw(GArrowFileInfo *file_info); +GARROW_EXTERN GArrowFileSystem * garrow_file_system_new_raw(std::shared_ptr *arrow_file_system); +GARROW_EXTERN std::shared_ptr garrow_file_system_get_raw(GArrowFileSystem *file_system); +GARROW_EXTERN GArrowSubTreeFileSystem * garrow_sub_tree_file_system_new_raw( std::shared_ptr *arrow_file_system, GArrowFileSystem *base_file_system); +GARROW_EXTERN GArrowSlowFileSystem * garrow_slow_file_system_new_raw(std::shared_ptr *arrow_file_system, GArrowFileSystem *base_file_system); #ifdef ARROW_S3 +GARROW_EXTERN arrow::fs::S3GlobalOptions * garrow_s3_global_options_get_raw(GArrowS3GlobalOptions *options); #endif diff --git a/c_glib/arrow-glib/file.h b/c_glib/arrow-glib/file.h index 42afed139463c..799dd83b9c243 100644 --- a/c_glib/arrow-glib/file.h +++ b/c_glib/arrow-glib/file.h @@ -27,15 +27,22 @@ G_BEGIN_DECLS #define GARROW_TYPE_FILE (garrow_file_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_INTERFACE(GArrowFile, garrow_file, GARROW, FILE, GObject) +GARROW_AVAILABLE_IN_ALL gboolean garrow_file_close(GArrowFile *file, GError **error); + GARROW_AVAILABLE_IN_0_13 gboolean garrow_file_is_closed(GArrowFile *file); + +GARROW_AVAILABLE_IN_ALL gint64 garrow_file_tell(GArrowFile *file, GError **error); + +GARROW_AVAILABLE_IN_ALL GArrowFileMode garrow_file_get_mode(GArrowFile *file); diff --git a/c_glib/arrow-glib/input-stream.cpp b/c_glib/arrow-glib/input-stream.cpp index 03a3f03fff7ce..52c79993e4ca8 100644 --- a/c_glib/arrow-glib/input-stream.cpp +++ b/c_glib/arrow-glib/input-stream.cpp @@ -35,6 +35,22 @@ #include #include +static std::shared_ptr +garrow_input_stream_get_raw_file_interface(GArrowFile *file) +{ + auto input_stream = GARROW_INPUT_STREAM(file); + auto arrow_input_stream = garrow_input_stream_get_raw(input_stream); + return arrow_input_stream; +} + +static std::shared_ptr +garrow_input_stream_get_raw_readable_interface(GArrowReadable *readable) +{ + auto input_stream = GARROW_INPUT_STREAM(readable); + auto arrow_input_stream = garrow_input_stream_get_raw(input_stream); + return arrow_input_stream; +} + G_BEGIN_DECLS /** @@ -71,28 +87,12 @@ enum { PROP_INPUT_STREAM = 1 }; -static std::shared_ptr -garrow_input_stream_get_raw_file_interface(GArrowFile *file) -{ - auto input_stream = GARROW_INPUT_STREAM(file); - auto arrow_input_stream = garrow_input_stream_get_raw(input_stream); - return arrow_input_stream; -} - static void garrow_input_stream_file_interface_init(GArrowFileInterface *iface) { iface->get_raw = garrow_input_stream_get_raw_file_interface; } -static std::shared_ptr -garrow_input_stream_get_raw_readable_interface(GArrowReadable *readable) -{ - auto input_stream = GARROW_INPUT_STREAM(readable); - auto arrow_input_stream = garrow_input_stream_get_raw(input_stream); - return arrow_input_stream; -} - static void garrow_input_stream_readable_interface_init(GArrowReadableInterface *iface) { diff --git a/c_glib/arrow-glib/input-stream.h b/c_glib/arrow-glib/input-stream.h index 3e2a2ecdbd4fa..676f2f44b0041 100644 --- a/c_glib/arrow-glib/input-stream.h +++ b/c_glib/arrow-glib/input-stream.h @@ -30,6 +30,7 @@ G_BEGIN_DECLS #define GARROW_TYPE_INPUT_STREAM (garrow_input_stream_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE( GArrowInputStream, garrow_input_stream, GARROW, INPUT_STREAM, GInputStream) struct _GArrowInputStreamClass @@ -37,16 +38,22 @@ struct _GArrowInputStreamClass GInputStreamClass parent_class; }; +GARROW_AVAILABLE_IN_ALL gboolean garrow_input_stream_advance(GArrowInputStream *input_stream, gint64 n_bytes, GError **error); + +GARROW_AVAILABLE_IN_ALL gboolean garrow_input_stream_align(GArrowInputStream *input_stream, gint32 alignment, GError **error); + +GARROW_AVAILABLE_IN_ALL GArrowTensor * garrow_input_stream_read_tensor(GArrowInputStream *input_stream, GError **error); + GARROW_AVAILABLE_IN_1_0 GArrowRecordBatch * garrow_input_stream_read_record_batch(GArrowInputStream *input_stream, @@ -55,6 +62,7 @@ garrow_input_stream_read_record_batch(GArrowInputStream *input_stream, GError **error); #define GARROW_TYPE_SEEKABLE_INPUT_STREAM (garrow_seekable_input_stream_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowSeekableInputStream, garrow_seekable_input_stream, GARROW, @@ -65,12 +73,17 @@ struct _GArrowSeekableInputStreamClass GArrowInputStreamClass parent_class; }; +GARROW_AVAILABLE_IN_ALL guint64 garrow_seekable_input_stream_get_size(GArrowSeekableInputStream *input_stream, GError **error); + +GARROW_AVAILABLE_IN_ALL gboolean garrow_seekable_input_stream_get_support_zero_copy( GArrowSeekableInputStream *input_stream); + +GARROW_AVAILABLE_IN_ALL GArrowBuffer * garrow_seekable_input_stream_read_at(GArrowSeekableInputStream *input_stream, gint64 position, @@ -89,6 +102,7 @@ garrow_seekable_input_stream_peek(GArrowSeekableInputStream *input_stream, GError **error); #define GARROW_TYPE_BUFFER_INPUT_STREAM (garrow_buffer_input_stream_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowBufferInputStream, garrow_buffer_input_stream, GARROW, @@ -99,13 +113,16 @@ struct _GArrowBufferInputStreamClass GArrowSeekableInputStreamClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowBufferInputStream * garrow_buffer_input_stream_new(GArrowBuffer *buffer); +GARROW_AVAILABLE_IN_ALL GArrowBuffer * garrow_buffer_input_stream_get_buffer(GArrowBufferInputStream *input_stream); #define GARROW_TYPE_FILE_INPUT_STREAM (garrow_file_input_stream_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowFileInputStream, garrow_file_input_stream, GARROW, @@ -116,15 +133,21 @@ struct _GArrowFileInputStreamClass GArrowSeekableInputStreamClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowFileInputStream * garrow_file_input_stream_new(const gchar *path, GError **error); + +GARROW_AVAILABLE_IN_ALL GArrowFileInputStream * garrow_file_input_stream_new_file_descriptor(gint file_descriptor, GError **error); + +GARROW_AVAILABLE_IN_ALL gint garrow_file_input_stream_get_file_descriptor(GArrowFileInputStream *stream); #define GARROW_TYPE_MEMORY_MAPPED_INPUT_STREAM \ (garrow_memory_mapped_input_stream_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowMemoryMappedInputStream, garrow_memory_mapped_input_stream, GARROW, @@ -135,10 +158,12 @@ struct _GArrowMemoryMappedInputStreamClass GArrowSeekableInputStreamClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowMemoryMappedInputStream * garrow_memory_mapped_input_stream_new(const gchar *path, GError **error); #define GARROW_TYPE_GIO_INPUT_STREAM (garrow_gio_input_stream_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowGIOInputStream, garrow_gio_input_stream, GARROW, @@ -149,15 +174,19 @@ struct _GArrowGIOInputStreamClass GArrowSeekableInputStreamClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowGIOInputStream * garrow_gio_input_stream_new(GInputStream *gio_input_stream); + #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL G_GNUC_DEPRECATED GInputStream * garrow_gio_input_stream_get_raw(GArrowGIOInputStream *input_stream); #endif #define GARROW_TYPE_COMPRESSED_INPUT_STREAM (garrow_compressed_input_stream_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowCompressedInputStream, garrow_compressed_input_stream, GARROW, @@ -168,6 +197,7 @@ struct _GArrowCompressedInputStreamClass GArrowInputStreamClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowCompressedInputStream * garrow_compressed_input_stream_new(GArrowCodec *codec, GArrowInputStream *raw, diff --git a/c_glib/arrow-glib/input-stream.hpp b/c_glib/arrow-glib/input-stream.hpp index 7ae759370ddbd..0400398c4777f 100644 --- a/c_glib/arrow-glib/input-stream.hpp +++ b/c_glib/arrow-glib/input-stream.hpp @@ -26,34 +26,48 @@ #include +GARROW_EXTERN GArrowInputStream * garrow_input_stream_new_raw(std::shared_ptr *arrow_input_stream); + +GARROW_EXTERN std::shared_ptr garrow_input_stream_get_raw(GArrowInputStream *input_stream); +GARROW_EXTERN GArrowSeekableInputStream * garrow_seekable_input_stream_new_raw( std::shared_ptr *arrow_random_access_file); + +GARROW_EXTERN std::shared_ptr garrow_seekable_input_stream_get_raw(GArrowSeekableInputStream *input_stream); +GARROW_EXTERN GArrowBufferInputStream * garrow_buffer_input_stream_new_raw( std::shared_ptr *arrow_buffer_reader, GArrowBuffer *buffer); + +GARROW_EXTERN std::shared_ptr garrow_buffer_input_stream_get_raw(GArrowBufferInputStream *input_stream); +GARROW_EXTERN GArrowFileInputStream * garrow_file_input_stream_new_raw(std::shared_ptr *arrow_stream); +GARROW_EXTERN GArrowMemoryMappedInputStream * garrow_memory_mapped_input_stream_new_raw( std::shared_ptr *arrow_stream); +GARROW_EXTERN GArrowCompressedInputStream * garrow_compressed_input_stream_new_raw( std::shared_ptr *arrow_raw, GArrowCodec *codec, GArrowInputStream *raw); + +GARROW_EXTERN std::shared_ptr garrow_compressed_input_stream_get_raw(GArrowCompressedInputStream *stream); diff --git a/c_glib/arrow-glib/interval.h b/c_glib/arrow-glib/interval.h index a6c9e1ff1e1ef..8c23b9a509bb4 100644 --- a/c_glib/arrow-glib/interval.h +++ b/c_glib/arrow-glib/interval.h @@ -26,6 +26,7 @@ G_BEGIN_DECLS #define GARROW_TYPE_DAY_MILLISECOND (garrow_day_millisecond_get_type()) +GARROW_AVAILABLE_IN_8_0 G_DECLARE_DERIVABLE_TYPE( GArrowDayMillisecond, garrow_day_millisecond, GARROW, DAY_MILLISECOND, GObject) @@ -47,6 +48,7 @@ garrow_day_millisecond_less_than(GArrowDayMillisecond *day_millisecond, GArrowDayMillisecond *other_day_millisecond); #define GARROW_TYPE_MONTH_DAY_NANO (garrow_month_day_nano_get_type()) +GARROW_AVAILABLE_IN_8_0 G_DECLARE_DERIVABLE_TYPE( GArrowMonthDayNano, garrow_month_day_nano, GARROW, MONTH_DAY_NANO, GObject) diff --git a/c_glib/arrow-glib/ipc-options.h b/c_glib/arrow-glib/ipc-options.h index 418b08f080152..1ddff059d2faf 100644 --- a/c_glib/arrow-glib/ipc-options.h +++ b/c_glib/arrow-glib/ipc-options.h @@ -26,6 +26,7 @@ G_BEGIN_DECLS #define GARROW_TYPE_READ_OPTIONS (garrow_read_options_get_type()) +GARROW_AVAILABLE_IN_1_0 G_DECLARE_DERIVABLE_TYPE( GArrowReadOptions, garrow_read_options, GARROW, READ_OPTIONS, GObject) struct _GArrowReadOptionsClass @@ -46,6 +47,7 @@ garrow_read_options_set_included_fields(GArrowReadOptions *options, gsize n_fields); #define GARROW_TYPE_WRITE_OPTIONS (garrow_write_options_get_type()) +GARROW_AVAILABLE_IN_1_0 G_DECLARE_DERIVABLE_TYPE( GArrowWriteOptions, garrow_write_options, GARROW, WRITE_OPTIONS, GObject) struct _GArrowWriteOptionsClass diff --git a/c_glib/arrow-glib/ipc-options.hpp b/c_glib/arrow-glib/ipc-options.hpp index f57fbd3c11e5a..838d05d41dbac 100644 --- a/c_glib/arrow-glib/ipc-options.hpp +++ b/c_glib/arrow-glib/ipc-options.hpp @@ -23,10 +23,14 @@ #include +GARROW_EXTERN arrow::ipc::IpcReadOptions * garrow_read_options_get_raw(GArrowReadOptions *options); + +GARROW_EXTERN arrow::ipc::DictionaryMemo * garrow_read_options_get_dictionary_memo_raw(GArrowReadOptions *options); +GARROW_EXTERN arrow::ipc::IpcWriteOptions * garrow_write_options_get_raw(GArrowWriteOptions *options); diff --git a/c_glib/arrow-glib/local-file-system.h b/c_glib/arrow-glib/local-file-system.h index 9af4f8e8b168d..6ad2ee9f231ab 100644 --- a/c_glib/arrow-glib/local-file-system.h +++ b/c_glib/arrow-glib/local-file-system.h @@ -27,6 +27,7 @@ G_BEGIN_DECLS #define GARROW_TYPE_LOCAL_FILE_SYSTEM_OPTIONS \ (garrow_local_file_system_options_get_type()) +GARROW_AVAILABLE_IN_0_17 G_DECLARE_DERIVABLE_TYPE(GArrowLocalFileSystemOptions, garrow_local_file_system_options, GARROW, @@ -44,6 +45,7 @@ garrow_local_file_system_options_new(void); /* arrow::fs::LocalFileSystem */ #define GARROW_TYPE_LOCAL_FILE_SYSTEM (garrow_local_file_system_get_type()) +GARROW_AVAILABLE_IN_0_17 G_DECLARE_DERIVABLE_TYPE(GArrowLocalFileSystem, garrow_local_file_system, GARROW, diff --git a/c_glib/arrow-glib/memory-pool.h b/c_glib/arrow-glib/memory-pool.h index de2a5d717a183..7da15a9eb1b47 100644 --- a/c_glib/arrow-glib/memory-pool.h +++ b/c_glib/arrow-glib/memory-pool.h @@ -21,9 +21,12 @@ #include +#include + G_BEGIN_DECLS #define GARROW_TYPE_MEMORY_POOL (garrow_memory_pool_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE( GArrowMemoryPool, garrow_memory_pool, GARROW, MEMORY_POOL, GObject) struct _GArrowMemoryPoolClass @@ -31,12 +34,19 @@ struct _GArrowMemoryPoolClass GObjectClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowMemoryPool * garrow_memory_pool_default(); + +GARROW_AVAILABLE_IN_ALL gint64 garrow_memory_pool_get_bytes_allocated(GArrowMemoryPool *memory_pool); + +GARROW_AVAILABLE_IN_ALL gint64 garrow_memory_pool_get_max_memory(GArrowMemoryPool *memory_pool); + +GARROW_AVAILABLE_IN_ALL gchar * garrow_memory_pool_get_backend_name(GArrowMemoryPool *memory_pool); diff --git a/c_glib/arrow-glib/meson.build b/c_glib/arrow-glib/meson.build index 67909ff22c428..fd32b35badcb1 100644 --- a/c_glib/arrow-glib/meson.build +++ b/c_glib/arrow-glib/meson.build @@ -205,14 +205,12 @@ cpp_internal_headers = files( 'internal-index.hpp', ) -version_h_conf = configuration_data() -version_h_conf.set('GARROW_VERSION_MAJOR', version_major) -version_h_conf.set('GARROW_VERSION_MINOR', version_minor) -version_h_conf.set('GARROW_VERSION_MICRO', version_micro) -version_h_conf.set('GARROW_VERSION_TAG', version_tag) -version_h = configure_file(input: 'version.h.in', - output: 'version.h', - configuration: version_h_conf) +version_h = configure_file( + input: 'version.h.in', + output: 'version.h', + command: [python3, generate_version_header_py, '--library', 'GARROW', '--version', version, '--input', '@INPUT@', '--output', '@OUTPUT@'], +) + c_headers += version_h enums = gnome.mkenums('enums', @@ -226,11 +224,9 @@ enums = gnome.mkenums('enums', enums_source = enums[0] enums_header = enums[1] - headers = c_headers + cpp_headers install_headers(headers, subdir: meson.project_name()) - gobject = dependency('gobject-2.0') gobject_libdir = gobject.get_variable(pkgconfig: 'libdir') # This is for Homebrew. "pkg-config --cflags gio-2.0" includes the @@ -253,6 +249,7 @@ libarrow_glib = library('arrow-glib', dependencies: dependencies, implicit_include_directories: false, include_directories: base_include_directories, + cpp_args: ['-DGARROW_COMPILATION'], soversion: so_version, version: library_version) arrow_glib = declare_dependency(link_with: libarrow_glib, diff --git a/c_glib/arrow-glib/orc-file-reader.h b/c_glib/arrow-glib/orc-file-reader.h index 20089eb2866c6..4eb3df5242e48 100644 --- a/c_glib/arrow-glib/orc-file-reader.h +++ b/c_glib/arrow-glib/orc-file-reader.h @@ -24,6 +24,7 @@ G_BEGIN_DECLS #define GARROW_TYPE_ORC_FILE_READER (garrow_orc_file_reader_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE( GArrowORCFileReader, garrow_orc_file_reader, GARROW, ORC_FILE_READER, GObject) struct _GArrowORCFileReaderClass @@ -31,10 +32,12 @@ struct _GArrowORCFileReaderClass GObjectClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowORCFileReader * garrow_orc_file_reader_new(GArrowSeekableInputStream *file, GError **error); #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_0_12_FOR(garrow_orc_file_reader_set_field_indices) void garrow_orc_file_reader_set_field_indexes(GArrowORCFileReader *reader, @@ -47,6 +50,7 @@ garrow_orc_file_reader_set_field_indices(GArrowORCFileReader *reader, const gint *field_indices, guint n_field_indices); #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_0_12_FOR(garrow_orc_file_reader_get_field_indices) const gint * garrow_orc_file_reader_get_field_indexes(GArrowORCFileReader *reader, @@ -56,14 +60,24 @@ GARROW_AVAILABLE_IN_0_12 const gint * garrow_orc_file_reader_get_field_indices(GArrowORCFileReader *reader, guint *n_field_indices); + +GARROW_AVAILABLE_IN_ALL GArrowSchema * garrow_orc_file_reader_read_type(GArrowORCFileReader *reader, GError **error); + +GARROW_AVAILABLE_IN_ALL GArrowTable * garrow_orc_file_reader_read_stripes(GArrowORCFileReader *reader, GError **error); + +GARROW_AVAILABLE_IN_ALL GArrowRecordBatch * garrow_orc_file_reader_read_stripe(GArrowORCFileReader *reader, gint64 i, GError **error); + +GARROW_AVAILABLE_IN_ALL gint64 garrow_orc_file_reader_get_n_stripes(GArrowORCFileReader *reader); + +GARROW_AVAILABLE_IN_ALL gint64 garrow_orc_file_reader_get_n_rows(GArrowORCFileReader *reader); diff --git a/c_glib/arrow-glib/output-stream.cpp b/c_glib/arrow-glib/output-stream.cpp index 83de2eb38a72a..d9bdf7ad8b786 100644 --- a/c_glib/arrow-glib/output-stream.cpp +++ b/c_glib/arrow-glib/output-stream.cpp @@ -33,6 +33,22 @@ #include #include +static std::shared_ptr +garrow_output_stream_get_raw_file_interface(GArrowFile *file) +{ + auto output_stream = GARROW_OUTPUT_STREAM(file); + auto arrow_output_stream = garrow_output_stream_get_raw(output_stream); + return arrow_output_stream; +} + +static std::shared_ptr +garrow_output_stream_get_raw_writable_interface(GArrowWritable *writable) +{ + auto output_stream = GARROW_OUTPUT_STREAM(writable); + auto arrow_output_stream = garrow_output_stream_get_raw(output_stream); + return arrow_output_stream; +} + G_BEGIN_DECLS /** @@ -65,28 +81,12 @@ enum { PROP_OUTPUT_STREAM }; -static std::shared_ptr -garrow_output_stream_get_raw_file_interface(GArrowFile *file) -{ - auto output_stream = GARROW_OUTPUT_STREAM(file); - auto arrow_output_stream = garrow_output_stream_get_raw(output_stream); - return arrow_output_stream; -} - static void garrow_output_stream_file_interface_init(GArrowFileInterface *iface) { iface->get_raw = garrow_output_stream_get_raw_file_interface; } -static std::shared_ptr -garrow_output_stream_get_raw_writable_interface(GArrowWritable *writable) -{ - auto output_stream = GARROW_OUTPUT_STREAM(writable); - auto arrow_output_stream = garrow_output_stream_get_raw(output_stream); - return arrow_output_stream; -} - static void garrow_output_stream_writable_interface_init(GArrowWritableInterface *iface) { diff --git a/c_glib/arrow-glib/output-stream.h b/c_glib/arrow-glib/output-stream.h index 1b18c08c14a5f..5c8b4b9374fc6 100644 --- a/c_glib/arrow-glib/output-stream.h +++ b/c_glib/arrow-glib/output-stream.h @@ -30,6 +30,7 @@ G_BEGIN_DECLS #define GARROW_TYPE_OUTPUT_STREAM (garrow_output_stream_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE( GArrowOutputStream, garrow_output_stream, GARROW, OUTPUT_STREAM, GObject) struct _GArrowOutputStreamClass @@ -37,8 +38,11 @@ struct _GArrowOutputStreamClass GObjectClass parent_class; }; +GARROW_AVAILABLE_IN_ALL gboolean garrow_output_stream_align(GArrowOutputStream *stream, gint32 alignment, GError **error); + +GARROW_AVAILABLE_IN_ALL gint64 garrow_output_stream_write_tensor(GArrowOutputStream *stream, GArrowTensor *tensor, @@ -51,6 +55,7 @@ garrow_output_stream_write_record_batch(GArrowOutputStream *stream, GError **error); #define GARROW_TYPE_FILE_OUTPUT_STREAM (garrow_file_output_stream_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowFileOutputStream, garrow_file_output_stream, GARROW, @@ -61,10 +66,12 @@ struct _GArrowFileOutputStreamClass GArrowOutputStreamClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowFileOutputStream * garrow_file_output_stream_new(const gchar *path, gboolean append, GError **error); #define GARROW_TYPE_BUFFER_OUTPUT_STREAM (garrow_buffer_output_stream_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowBufferOutputStream, garrow_buffer_output_stream, GARROW, @@ -75,10 +82,12 @@ struct _GArrowBufferOutputStreamClass GArrowOutputStreamClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowBufferOutputStream * garrow_buffer_output_stream_new(GArrowResizableBuffer *buffer); #define GARROW_TYPE_GIO_OUTPUT_STREAM (garrow_gio_output_stream_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowGIOOutputStream, garrow_gio_output_stream, GARROW, @@ -89,15 +98,19 @@ struct _GArrowGIOOutputStreamClass GArrowOutputStreamClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowGIOOutputStream * garrow_gio_output_stream_new(GOutputStream *gio_output_stream); + #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL G_GNUC_DEPRECATED GOutputStream * garrow_gio_output_stream_get_raw(GArrowGIOOutputStream *output_stream); #endif #define GARROW_TYPE_COMPRESSED_OUTPUT_STREAM (garrow_compressed_output_stream_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowCompressedOutputStream, garrow_compressed_output_stream, GARROW, @@ -108,6 +121,7 @@ struct _GArrowCompressedOutputStreamClass GArrowOutputStreamClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowCompressedOutputStream * garrow_compressed_output_stream_new(GArrowCodec *codec, GArrowOutputStream *raw, diff --git a/c_glib/arrow-glib/output-stream.hpp b/c_glib/arrow-glib/output-stream.hpp index e41c65da88d82..515d969efc245 100644 --- a/c_glib/arrow-glib/output-stream.hpp +++ b/c_glib/arrow-glib/output-stream.hpp @@ -25,23 +25,32 @@ #include +GARROW_EXTERN GArrowOutputStream * garrow_output_stream_new_raw( std::shared_ptr *arrow_output_stream); + +GARROW_EXTERN std::shared_ptr garrow_output_stream_get_raw(GArrowOutputStream *output_stream); +GARROW_EXTERN GArrowFileOutputStream * garrow_file_output_stream_new_raw( std::shared_ptr *arrow_file_output_stream); + +GARROW_EXTERN GArrowBufferOutputStream * garrow_buffer_output_stream_new_raw( std::shared_ptr *arrow_buffer_output_stream); +GARROW_EXTERN GArrowCompressedOutputStream * garrow_compressed_output_stream_new_raw( std::shared_ptr *arrow_raw, GArrowCodec *codec, GArrowOutputStream *raw); + +GARROW_EXTERN std::shared_ptr garrow_compressed_output_stream_get_raw(GArrowCompressedOutputStream *stream); diff --git a/c_glib/arrow-glib/readable.h b/c_glib/arrow-glib/readable.h index d0b1f5b6a99ee..266b45849057e 100644 --- a/c_glib/arrow-glib/readable.h +++ b/c_glib/arrow-glib/readable.h @@ -25,10 +25,13 @@ G_BEGIN_DECLS #define GARROW_TYPE_READABLE (garrow_readable_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_INTERFACE(GArrowReadable, garrow_readable, GARROW, READABLE, GObject) +GARROW_AVAILABLE_IN_ALL GArrowBuffer * garrow_readable_read(GArrowReadable *readable, gint64 n_bytes, GError **error); + GARROW_AVAILABLE_IN_0_17 GBytes * garrow_readable_read_bytes(GArrowReadable *readable, gint64 n_bytes, GError **error); diff --git a/c_glib/arrow-glib/reader.h b/c_glib/arrow-glib/reader.h index 96e4c5bbb5890..5401aa3bb1fc5 100644 --- a/c_glib/arrow-glib/reader.h +++ b/c_glib/arrow-glib/reader.h @@ -29,6 +29,7 @@ G_BEGIN_DECLS #define GARROW_TYPE_RECORD_BATCH_READER (garrow_record_batch_reader_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowRecordBatchReader, garrow_record_batch_reader, GARROW, @@ -53,22 +54,29 @@ GARROW_AVAILABLE_IN_6_0 gpointer garrow_record_batch_reader_export(GArrowRecordBatchReader *reader, GError **error); +GARROW_AVAILABLE_IN_ALL GArrowSchema * garrow_record_batch_reader_get_schema(GArrowRecordBatchReader *reader); + #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL G_GNUC_DEPRECATED_FOR(garrow_record_batch_reader_read_next) GArrowRecordBatch * garrow_record_batch_reader_get_next_record_batch(GArrowRecordBatchReader *reader, GError **error); #endif #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL G_GNUC_DEPRECATED_FOR(garrow_record_batch_reader_read_next) GArrowRecordBatch * garrow_record_batch_reader_read_next_record_batch(GArrowRecordBatchReader *reader, GError **error); #endif + +GARROW_AVAILABLE_IN_ALL GArrowRecordBatch * garrow_record_batch_reader_read_next(GArrowRecordBatchReader *reader, GError **error); + GARROW_AVAILABLE_IN_6_0 GArrowTable * garrow_record_batch_reader_read_all(GArrowRecordBatchReader *reader, GError **error); @@ -78,6 +86,7 @@ GList * garrow_record_batch_reader_get_sources(GArrowRecordBatchReader *reader); #define GARROW_TYPE_TABLE_BATCH_READER (garrow_table_batch_reader_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowTableBatchReader, garrow_table_batch_reader, GARROW, @@ -88,6 +97,7 @@ struct _GArrowTableBatchReaderClass GArrowRecordBatchReaderClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowTableBatchReader * garrow_table_batch_reader_new(GArrowTable *table); @@ -98,6 +108,7 @@ garrow_table_batch_reader_set_max_chunk_size(GArrowTableBatchReader *reader, #define GARROW_TYPE_RECORD_BATCH_STREAM_READER \ (garrow_record_batch_stream_reader_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowRecordBatchStreamReader, garrow_record_batch_stream_reader, GARROW, @@ -108,10 +119,12 @@ struct _GArrowRecordBatchStreamReaderClass GArrowRecordBatchReaderClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowRecordBatchStreamReader * garrow_record_batch_stream_reader_new(GArrowInputStream *stream, GError **error); #define GARROW_TYPE_RECORD_BATCH_FILE_READER (garrow_record_batch_file_reader_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowRecordBatchFileReader, garrow_record_batch_file_reader, GARROW, @@ -122,28 +135,39 @@ struct _GArrowRecordBatchFileReaderClass GObjectClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowRecordBatchFileReader * garrow_record_batch_file_reader_new(GArrowSeekableInputStream *file, GError **error); +GARROW_AVAILABLE_IN_ALL GArrowSchema * garrow_record_batch_file_reader_get_schema(GArrowRecordBatchFileReader *reader); + +GARROW_AVAILABLE_IN_ALL guint garrow_record_batch_file_reader_get_n_record_batches(GArrowRecordBatchFileReader *reader); + +GARROW_AVAILABLE_IN_ALL GArrowMetadataVersion garrow_record_batch_file_reader_get_version(GArrowRecordBatchFileReader *reader); + #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL G_GNUC_DEPRECATED_FOR(garrow_record_batch_file_reader_read_record_batch) GArrowRecordBatch * garrow_record_batch_file_reader_get_record_batch(GArrowRecordBatchFileReader *reader, guint i, GError **error); #endif + +GARROW_AVAILABLE_IN_ALL GArrowRecordBatch * garrow_record_batch_file_reader_read_record_batch(GArrowRecordBatchFileReader *reader, guint i, GError **error); #define GARROW_TYPE_FEATHER_FILE_READER (garrow_feather_file_reader_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowFeatherFileReader, garrow_feather_file_reader, GARROW, @@ -154,18 +178,26 @@ struct _GArrowFeatherFileReaderClass GObjectClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowFeatherFileReader * garrow_feather_file_reader_new(GArrowSeekableInputStream *file, GError **error); +GARROW_AVAILABLE_IN_ALL gint garrow_feather_file_reader_get_version(GArrowFeatherFileReader *reader); + +GARROW_AVAILABLE_IN_ALL GArrowTable * garrow_feather_file_reader_read(GArrowFeatherFileReader *reader, GError **error); + +GARROW_AVAILABLE_IN_ALL GArrowTable * garrow_feather_file_reader_read_indices(GArrowFeatherFileReader *reader, const gint *indices, guint n_indices, GError **error); + +GARROW_AVAILABLE_IN_ALL GArrowTable * garrow_feather_file_reader_read_names(GArrowFeatherFileReader *reader, const gchar **names, @@ -173,6 +205,7 @@ garrow_feather_file_reader_read_names(GArrowFeatherFileReader *reader, GError **error); #define GARROW_TYPE_CSV_READ_OPTIONS (garrow_csv_read_options_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE( GArrowCSVReadOptions, garrow_csv_read_options, GARROW, CSV_READ_OPTIONS, GObject) struct _GArrowCSVReadOptionsClass @@ -180,16 +213,23 @@ struct _GArrowCSVReadOptionsClass GObjectClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowCSVReadOptions * garrow_csv_read_options_new(void); + +GARROW_AVAILABLE_IN_ALL void garrow_csv_read_options_add_column_type(GArrowCSVReadOptions *options, const gchar *name, GArrowDataType *data_type); +GARROW_AVAILABLE_IN_ALL void garrow_csv_read_options_add_schema(GArrowCSVReadOptions *options, GArrowSchema *schema); + +GARROW_AVAILABLE_IN_ALL GHashTable * garrow_csv_read_options_get_column_types(GArrowCSVReadOptions *options); + GARROW_AVAILABLE_IN_0_14 void garrow_csv_read_options_set_null_values(GArrowCSVReadOptions *options, @@ -251,16 +291,20 @@ garrow_csv_read_options_add_timestamp_parser(GArrowCSVReadOptions *options, GArrowTimestampParser *parser); #define GARROW_TYPE_CSV_READER (garrow_csv_reader_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowCSVReader, garrow_csv_reader, GARROW, CSV_READER, GObject) struct _GArrowCSVReaderClass { GObjectClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowCSVReader * garrow_csv_reader_new(GArrowInputStream *input, GArrowCSVReadOptions *options, GError **error); + +GARROW_AVAILABLE_IN_ALL GArrowTable * garrow_csv_reader_read(GArrowCSVReader *reader, GError **error); @@ -279,6 +323,7 @@ typedef enum { } GArrowJSONReadUnexpectedFieldBehavior; #define GARROW_TYPE_JSON_READ_OPTIONS (garrow_json_read_options_get_type()) +GARROW_AVAILABLE_IN_0_14 G_DECLARE_DERIVABLE_TYPE( GArrowJSONReadOptions, garrow_json_read_options, GARROW, JSON_READ_OPTIONS, GObject) struct _GArrowJSONReadOptionsClass @@ -291,6 +336,7 @@ GArrowJSONReadOptions * garrow_json_read_options_new(void); #define GARROW_TYPE_JSON_READER (garrow_json_reader_get_type()) +GARROW_AVAILABLE_IN_0_14 G_DECLARE_DERIVABLE_TYPE( GArrowJSONReader, garrow_json_reader, GARROW, JSON_READER, GObject) struct _GArrowJSONReaderClass diff --git a/c_glib/arrow-glib/reader.hpp b/c_glib/arrow-glib/reader.hpp index 192497ef52e31..beec6766af2e6 100644 --- a/c_glib/arrow-glib/reader.hpp +++ b/c_glib/arrow-glib/reader.hpp @@ -27,42 +27,61 @@ #include +GARROW_EXTERN GArrowRecordBatchReader * garrow_record_batch_reader_new_raw( std::shared_ptr *arrow_reader, GList *sources); + +GARROW_EXTERN std::shared_ptr garrow_record_batch_reader_get_raw(GArrowRecordBatchReader *reader); +GARROW_EXTERN GArrowTableBatchReader * garrow_table_batch_reader_new_raw(std::shared_ptr *arrow_reader, GArrowTable *table); + +GARROW_EXTERN std::shared_ptr garrow_table_batch_reader_get_raw(GArrowTableBatchReader *reader); +GARROW_EXTERN GArrowRecordBatchStreamReader * garrow_record_batch_stream_reader_new_raw( std::shared_ptr *arrow_reader); +GARROW_EXTERN GArrowRecordBatchFileReader * garrow_record_batch_file_reader_new_raw( std::shared_ptr *arrow_reader); + +GARROW_EXTERN std::shared_ptr garrow_record_batch_file_reader_get_raw(GArrowRecordBatchFileReader *reader); +GARROW_EXTERN GArrowFeatherFileReader * garrow_feather_file_reader_new_raw( std::shared_ptr *arrow_reader); + +GARROW_EXTERN std::shared_ptr garrow_feather_file_reader_get_raw(GArrowFeatherFileReader *reader); +GARROW_EXTERN GArrowCSVReader * garrow_csv_reader_new_raw(std::shared_ptr *arrow_reader, GArrowInputStream *input); + +GARROW_EXTERN std::shared_ptr garrow_csv_reader_get_raw(GArrowCSVReader *reader); +GARROW_EXTERN GArrowJSONReader * garrow_json_reader_new_raw(std::shared_ptr *arrow_reader, GArrowInputStream *input); + +GARROW_EXTERN std::shared_ptr garrow_json_reader_get_raw(GArrowJSONReader *reader); diff --git a/c_glib/arrow-glib/record-batch.h b/c_glib/arrow-glib/record-batch.h index 3c995658224cb..e7ffd83795ed4 100644 --- a/c_glib/arrow-glib/record-batch.h +++ b/c_glib/arrow-glib/record-batch.h @@ -26,6 +26,7 @@ G_BEGIN_DECLS #define GARROW_TYPE_RECORD_BATCH (garrow_record_batch_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE( GArrowRecordBatch, garrow_record_batch, GARROW, RECORD_BATCH, GObject) struct _GArrowRecordBatchClass @@ -37,6 +38,7 @@ GARROW_AVAILABLE_IN_6_0 GArrowRecordBatch * garrow_record_batch_import(gpointer c_abi_array, GArrowSchema *schema, GError **error); +GARROW_AVAILABLE_IN_ALL GArrowRecordBatch * garrow_record_batch_new(GArrowSchema *schema, guint32 n_rows, @@ -50,6 +52,7 @@ garrow_record_batch_export(GArrowRecordBatch *record_batch, gpointer *c_abi_schema, GError **error); +GARROW_AVAILABLE_IN_ALL gboolean garrow_record_batch_equal(GArrowRecordBatch *record_batch, GArrowRecordBatch *other_record_batch); @@ -59,28 +62,43 @@ garrow_record_batch_equal_metadata(GArrowRecordBatch *record_batch, GArrowRecordBatch *other_record_batch, gboolean check_metadata); +GARROW_AVAILABLE_IN_ALL GArrowSchema * garrow_record_batch_get_schema(GArrowRecordBatch *record_batch); + GARROW_AVAILABLE_IN_0_15 GArrowArray * garrow_record_batch_get_column_data(GArrowRecordBatch *record_batch, gint i); + +GARROW_AVAILABLE_IN_ALL const gchar * garrow_record_batch_get_column_name(GArrowRecordBatch *record_batch, gint i); + +GARROW_AVAILABLE_IN_ALL guint garrow_record_batch_get_n_columns(GArrowRecordBatch *record_batch); + +GARROW_AVAILABLE_IN_ALL gint64 garrow_record_batch_get_n_rows(GArrowRecordBatch *record_batch); + +GARROW_AVAILABLE_IN_ALL GArrowRecordBatch * garrow_record_batch_slice(GArrowRecordBatch *record_batch, gint64 offset, gint64 length); +GARROW_AVAILABLE_IN_ALL gchar * garrow_record_batch_to_string(GArrowRecordBatch *record_batch, GError **error); + +GARROW_AVAILABLE_IN_ALL GArrowRecordBatch * garrow_record_batch_add_column(GArrowRecordBatch *record_batch, guint i, GArrowField *field, GArrowArray *column, GError **error); + +GARROW_AVAILABLE_IN_ALL GArrowRecordBatch * garrow_record_batch_remove_column(GArrowRecordBatch *record_batch, guint i, @@ -92,6 +110,7 @@ garrow_record_batch_serialize(GArrowRecordBatch *record_batch, GError **error); #define GARROW_TYPE_RECORD_BATCH_ITERATOR (garrow_record_batch_iterator_get_type()) +GARROW_AVAILABLE_IN_0_17 G_DECLARE_DERIVABLE_TYPE(GArrowRecordBatchIterator, garrow_record_batch_iterator, GARROW, diff --git a/c_glib/arrow-glib/record-batch.hpp b/c_glib/arrow-glib/record-batch.hpp index 4c3e5e8a78231..75c0432b390ba 100644 --- a/c_glib/arrow-glib/record-batch.hpp +++ b/c_glib/arrow-glib/record-batch.hpp @@ -23,13 +23,18 @@ #include +GARROW_EXTERN GArrowRecordBatch * garrow_record_batch_new_raw(std::shared_ptr *arrow_record_batch); + +GARROW_EXTERN std::shared_ptr garrow_record_batch_get_raw(GArrowRecordBatch *record_batch); +GARROW_EXTERN GArrowRecordBatchIterator * garrow_record_batch_iterator_new_raw(arrow::RecordBatchIterator *arrow_iterator); +GARROW_EXTERN arrow::RecordBatchIterator * garrow_record_batch_iterator_get_raw(GArrowRecordBatchIterator *iterator); diff --git a/c_glib/arrow-glib/scalar.h b/c_glib/arrow-glib/scalar.h index b4a6229c62fd1..5f9015d29c61c 100644 --- a/c_glib/arrow-glib/scalar.h +++ b/c_glib/arrow-glib/scalar.h @@ -25,6 +25,7 @@ G_BEGIN_DECLS #define GARROW_TYPE_SCALAR (garrow_scalar_get_type()) +GARROW_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE(GArrowScalar, garrow_scalar, GARROW, SCALAR, GObject) struct _GArrowScalarClass { @@ -64,6 +65,7 @@ garrow_scalar_cast(GArrowScalar *scalar, GError **error); #define GARROW_TYPE_NULL_SCALAR (garrow_null_scalar_get_type()) +GARROW_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE( GArrowNullScalar, garrow_null_scalar, GARROW, NULL_SCALAR, GArrowScalar) struct _GArrowNullScalarClass @@ -76,6 +78,7 @@ GArrowNullScalar * garrow_null_scalar_new(void); #define GARROW_TYPE_BOOLEAN_SCALAR (garrow_boolean_scalar_get_type()) +GARROW_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE( GArrowBooleanScalar, garrow_boolean_scalar, GARROW, BOOLEAN_SCALAR, GArrowScalar) struct _GArrowBooleanScalarClass @@ -91,6 +94,7 @@ gboolean garrow_boolean_scalar_get_value(GArrowBooleanScalar *scalar); #define GARROW_TYPE_INT8_SCALAR (garrow_int8_scalar_get_type()) +GARROW_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE( GArrowInt8Scalar, garrow_int8_scalar, GARROW, INT8_SCALAR, GArrowScalar) struct _GArrowInt8ScalarClass @@ -106,6 +110,7 @@ gint8 garrow_int8_scalar_get_value(GArrowInt8Scalar *scalar); #define GARROW_TYPE_INT16_SCALAR (garrow_int16_scalar_get_type()) +GARROW_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE( GArrowInt16Scalar, garrow_int16_scalar, GARROW, INT16_SCALAR, GArrowScalar) struct _GArrowInt16ScalarClass @@ -121,6 +126,7 @@ gint16 garrow_int16_scalar_get_value(GArrowInt16Scalar *scalar); #define GARROW_TYPE_INT32_SCALAR (garrow_int32_scalar_get_type()) +GARROW_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE( GArrowInt32Scalar, garrow_int32_scalar, GARROW, INT32_SCALAR, GArrowScalar) struct _GArrowInt32ScalarClass @@ -136,6 +142,7 @@ gint32 garrow_int32_scalar_get_value(GArrowInt32Scalar *scalar); #define GARROW_TYPE_INT64_SCALAR (garrow_int64_scalar_get_type()) +GARROW_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE( GArrowInt64Scalar, garrow_int64_scalar, GARROW, INT64_SCALAR, GArrowScalar) struct _GArrowInt64ScalarClass @@ -151,6 +158,7 @@ gint64 garrow_int64_scalar_get_value(GArrowInt64Scalar *scalar); #define GARROW_TYPE_UINT8_SCALAR (garrow_uint8_scalar_get_type()) +GARROW_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE( GArrowUInt8Scalar, garrow_uint8_scalar, GARROW, UINT8_SCALAR, GArrowScalar) struct _GArrowUInt8ScalarClass @@ -166,6 +174,7 @@ guint8 garrow_uint8_scalar_get_value(GArrowUInt8Scalar *scalar); #define GARROW_TYPE_UINT16_SCALAR (garrow_uint16_scalar_get_type()) +GARROW_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE( GArrowUInt16Scalar, garrow_uint16_scalar, GARROW, UINT16_SCALAR, GArrowScalar) struct _GArrowUInt16ScalarClass @@ -181,6 +190,7 @@ guint16 garrow_uint16_scalar_get_value(GArrowUInt16Scalar *scalar); #define GARROW_TYPE_UINT32_SCALAR (garrow_uint32_scalar_get_type()) +GARROW_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE( GArrowUInt32Scalar, garrow_uint32_scalar, GARROW, UINT32_SCALAR, GArrowScalar) struct _GArrowUInt32ScalarClass @@ -196,6 +206,7 @@ guint32 garrow_uint32_scalar_get_value(GArrowUInt32Scalar *scalar); #define GARROW_TYPE_UINT64_SCALAR (garrow_uint64_scalar_get_type()) +GARROW_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE( GArrowUInt64Scalar, garrow_uint64_scalar, GARROW, UINT64_SCALAR, GArrowScalar) struct _GArrowUInt64ScalarClass @@ -211,6 +222,7 @@ guint64 garrow_uint64_scalar_get_value(GArrowUInt64Scalar *scalar); #define GARROW_TYPE_HALF_FLOAT_SCALAR (garrow_half_float_scalar_get_type()) +GARROW_AVAILABLE_IN_11_0 G_DECLARE_DERIVABLE_TYPE(GArrowHalfFloatScalar, garrow_half_float_scalar, GARROW, @@ -229,6 +241,7 @@ guint16 garrow_half_float_scalar_get_value(GArrowHalfFloatScalar *scalar); #define GARROW_TYPE_FLOAT_SCALAR (garrow_float_scalar_get_type()) +GARROW_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE( GArrowFloatScalar, garrow_float_scalar, GARROW, FLOAT_SCALAR, GArrowScalar) struct _GArrowFloatScalarClass @@ -244,6 +257,7 @@ gfloat garrow_float_scalar_get_value(GArrowFloatScalar *scalar); #define GARROW_TYPE_DOUBLE_SCALAR (garrow_double_scalar_get_type()) +GARROW_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE( GArrowDoubleScalar, garrow_double_scalar, GARROW, DOUBLE_SCALAR, GArrowScalar) struct _GArrowDoubleScalarClass @@ -259,6 +273,7 @@ gdouble garrow_double_scalar_get_value(GArrowDoubleScalar *scalar); #define GARROW_TYPE_BASE_BINARY_SCALAR (garrow_base_binary_scalar_get_type()) +GARROW_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE(GArrowBaseBinaryScalar, garrow_base_binary_scalar, GARROW, @@ -274,6 +289,7 @@ GArrowBuffer * garrow_base_binary_scalar_get_value(GArrowBaseBinaryScalar *scalar); #define GARROW_TYPE_BINARY_SCALAR (garrow_binary_scalar_get_type()) +GARROW_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE( GArrowBinaryScalar, garrow_binary_scalar, GARROW, BINARY_SCALAR, GArrowBaseBinaryScalar) struct _GArrowBinaryScalarClass @@ -286,6 +302,7 @@ GArrowBinaryScalar * garrow_binary_scalar_new(GArrowBuffer *value); #define GARROW_TYPE_STRING_SCALAR (garrow_string_scalar_get_type()) +GARROW_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE( GArrowStringScalar, garrow_string_scalar, GARROW, STRING_SCALAR, GArrowBaseBinaryScalar) struct _GArrowStringScalarClass @@ -298,6 +315,7 @@ GArrowStringScalar * garrow_string_scalar_new(GArrowBuffer *value); #define GARROW_TYPE_LARGE_BINARY_SCALAR (garrow_large_binary_scalar_get_type()) +GARROW_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE(GArrowLargeBinaryScalar, garrow_large_binary_scalar, GARROW, @@ -313,6 +331,7 @@ GArrowLargeBinaryScalar * garrow_large_binary_scalar_new(GArrowBuffer *value); #define GARROW_TYPE_LARGE_STRING_SCALAR (garrow_large_string_scalar_get_type()) +GARROW_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE(GArrowLargeStringScalar, garrow_large_string_scalar, GARROW, @@ -328,6 +347,7 @@ GArrowLargeStringScalar * garrow_large_string_scalar_new(GArrowBuffer *value); #define GARROW_TYPE_FIXED_SIZE_BINARY_SCALAR (garrow_fixed_size_binary_scalar_get_type()) +GARROW_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE(GArrowFixedSizeBinaryScalar, garrow_fixed_size_binary_scalar, GARROW, @@ -344,6 +364,7 @@ garrow_fixed_size_binary_scalar_new(GArrowFixedSizeBinaryDataType *data_type, GArrowBuffer *value); #define GARROW_TYPE_DATE32_SCALAR (garrow_date32_scalar_get_type()) +GARROW_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE( GArrowDate32Scalar, garrow_date32_scalar, GARROW, DATE32_SCALAR, GArrowScalar) struct _GArrowDate32ScalarClass @@ -359,6 +380,7 @@ gint32 garrow_date32_scalar_get_value(GArrowDate32Scalar *scalar); #define GARROW_TYPE_DATE64_SCALAR (garrow_date64_scalar_get_type()) +GARROW_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE( GArrowDate64Scalar, garrow_date64_scalar, GARROW, DATE64_SCALAR, GArrowScalar) struct _GArrowDate64ScalarClass @@ -374,6 +396,7 @@ gint64 garrow_date64_scalar_get_value(GArrowDate64Scalar *scalar); #define GARROW_TYPE_TIME32_SCALAR (garrow_time32_scalar_get_type()) +GARROW_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE( GArrowTime32Scalar, garrow_time32_scalar, GARROW, TIME32_SCALAR, GArrowScalar) struct _GArrowTime32ScalarClass @@ -389,6 +412,7 @@ gint32 garrow_time32_scalar_get_value(GArrowTime32Scalar *scalar); #define GARROW_TYPE_TIME64_SCALAR (garrow_time64_scalar_get_type()) +GARROW_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE( GArrowTime64Scalar, garrow_time64_scalar, GARROW, TIME64_SCALAR, GArrowScalar) struct _GArrowTime64ScalarClass @@ -404,6 +428,7 @@ gint64 garrow_time64_scalar_get_value(GArrowTime64Scalar *scalar); #define GARROW_TYPE_TIMESTAMP_SCALAR (garrow_timestamp_scalar_get_type()) +GARROW_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE( GArrowTimestampScalar, garrow_timestamp_scalar, GARROW, TIMESTAMP_SCALAR, GArrowScalar) struct _GArrowTimestampScalarClass @@ -419,6 +444,7 @@ gint64 garrow_timestamp_scalar_get_value(GArrowTimestampScalar *scalar); #define GARROW_TYPE_MONTH_INTERVAL_SCALAR (garrow_month_interval_scalar_get_type()) +GARROW_AVAILABLE_IN_8_0 G_DECLARE_DERIVABLE_TYPE(GArrowMonthIntervalScalar, garrow_month_interval_scalar, GARROW, @@ -437,6 +463,7 @@ gint32 garrow_month_interval_scalar_get_value(GArrowMonthIntervalScalar *scalar); #define GARROW_TYPE_DAY_TIME_INTERVAL_SCALAR (garrow_day_time_interval_scalar_get_type()) +GARROW_AVAILABLE_IN_8_0 G_DECLARE_DERIVABLE_TYPE(GArrowDayTimeIntervalScalar, garrow_day_time_interval_scalar, GARROW, @@ -456,6 +483,7 @@ garrow_day_time_interval_scalar_get_value(GArrowDayTimeIntervalScalar *scalar); #define GARROW_TYPE_MONTH_DAY_NANO_INTERVAL_SCALAR \ (garrow_month_day_nano_interval_scalar_get_type()) +GARROW_AVAILABLE_IN_8_0 G_DECLARE_DERIVABLE_TYPE(GArrowMonthDayNanoIntervalScalar, garrow_month_day_nano_interval_scalar, GARROW, @@ -474,6 +502,7 @@ GArrowMonthDayNano * garrow_month_day_nano_interval_scalar_get_value(GArrowMonthDayNanoIntervalScalar *scalar); #define GARROW_TYPE_DECIMAL128_SCALAR (garrow_decimal128_scalar_get_type()) +GARROW_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE(GArrowDecimal128Scalar, garrow_decimal128_scalar, GARROW, @@ -493,6 +522,7 @@ GArrowDecimal128 * garrow_decimal128_scalar_get_value(GArrowDecimal128Scalar *scalar); #define GARROW_TYPE_DECIMAL256_SCALAR (garrow_decimal256_scalar_get_type()) +GARROW_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE(GArrowDecimal256Scalar, garrow_decimal256_scalar, GARROW, @@ -512,6 +542,7 @@ GArrowDecimal256 * garrow_decimal256_scalar_get_value(GArrowDecimal256Scalar *scalar); #define GARROW_TYPE_BASE_LIST_SCALAR (garrow_base_list_scalar_get_type()) +GARROW_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE( GArrowBaseListScalar, garrow_base_list_scalar, GARROW, BASE_LIST_SCALAR, GArrowScalar) struct _GArrowBaseListScalarClass @@ -524,6 +555,7 @@ GArrowArray * garrow_base_list_scalar_get_value(GArrowBaseListScalar *scalar); #define GARROW_TYPE_LIST_SCALAR (garrow_list_scalar_get_type()) +GARROW_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE( GArrowListScalar, garrow_list_scalar, GARROW, LIST_SCALAR, GArrowBaseListScalar) struct _GArrowListScalarClass @@ -536,6 +568,7 @@ GArrowListScalar * garrow_list_scalar_new(GArrowListArray *value); #define GARROW_TYPE_LARGE_LIST_SCALAR (garrow_large_list_scalar_get_type()) +GARROW_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE(GArrowLargeListScalar, garrow_large_list_scalar, GARROW, @@ -551,6 +584,7 @@ GArrowLargeListScalar * garrow_large_list_scalar_new(GArrowLargeListArray *value); #define GARROW_TYPE_MAP_SCALAR (garrow_map_scalar_get_type()) +GARROW_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE( GArrowMapScalar, garrow_map_scalar, GARROW, MAP_SCALAR, GArrowBaseListScalar) struct _GArrowMapScalarClass @@ -563,6 +597,7 @@ GArrowMapScalar * garrow_map_scalar_new(GArrowStructArray *value); #define GARROW_TYPE_STRUCT_SCALAR (garrow_struct_scalar_get_type()) +GARROW_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE( GArrowStructScalar, garrow_struct_scalar, GARROW, STRUCT_SCALAR, GArrowScalar) struct _GArrowStructScalarClass @@ -578,6 +613,7 @@ GList * garrow_struct_scalar_get_value(GArrowStructScalar *scalar); #define GARROW_TYPE_UNION_SCALAR (garrow_union_scalar_get_type()) +GARROW_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE( GArrowUnionScalar, garrow_union_scalar, GARROW, UNION_SCALAR, GArrowScalar) struct _GArrowUnionScalarClass @@ -593,6 +629,7 @@ GArrowScalar * garrow_union_scalar_get_value(GArrowUnionScalar *scalar); #define GARROW_TYPE_SPARSE_UNION_SCALAR (garrow_sparse_union_scalar_get_type()) +GARROW_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE(GArrowSparseUnionScalar, garrow_sparse_union_scalar, GARROW, @@ -610,6 +647,7 @@ garrow_sparse_union_scalar_new(GArrowSparseUnionDataType *data_type, GArrowScalar *value); #define GARROW_TYPE_DENSE_UNION_SCALAR (garrow_dense_union_scalar_get_type()) +GARROW_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE(GArrowDenseUnionScalar, garrow_dense_union_scalar, GARROW, @@ -627,6 +665,7 @@ garrow_dense_union_scalar_new(GArrowDenseUnionDataType *data_type, GArrowScalar *value); #define GARROW_TYPE_EXTENSION_SCALAR (garrow_extension_scalar_get_type()) +GARROW_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE( GArrowExtensionScalar, garrow_extension_scalar, GARROW, EXTENSION_SCALAR, GArrowScalar) struct _GArrowExtensionScalarClass diff --git a/c_glib/arrow-glib/schema.h b/c_glib/arrow-glib/schema.h index 93cd5bd542cf8..aab740397b7d6 100644 --- a/c_glib/arrow-glib/schema.h +++ b/c_glib/arrow-glib/schema.h @@ -24,6 +24,7 @@ G_BEGIN_DECLS #define GARROW_TYPE_SCHEMA (garrow_schema_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowSchema, garrow_schema, GARROW, SCHEMA, GObject) struct _GArrowSchemaClass { @@ -34,6 +35,7 @@ GARROW_AVAILABLE_IN_6_0 GArrowSchema * garrow_schema_import(gpointer c_abi_schema, GError **error); +GARROW_AVAILABLE_IN_ALL GArrowSchema * garrow_schema_new(GList *fields); @@ -41,34 +43,48 @@ GARROW_AVAILABLE_IN_6_0 gpointer garrow_schema_export(GArrowSchema *schema, GError **error); +GARROW_AVAILABLE_IN_ALL gboolean garrow_schema_equal(GArrowSchema *schema, GArrowSchema *other_schema); + +GARROW_AVAILABLE_IN_ALL GArrowField * garrow_schema_get_field(GArrowSchema *schema, guint i); + +GARROW_AVAILABLE_IN_ALL GArrowField * garrow_schema_get_field_by_name(GArrowSchema *schema, const gchar *name); GARROW_AVAILABLE_IN_0_15 gint garrow_schema_get_field_index(GArrowSchema *schema, const gchar *name); +GARROW_AVAILABLE_IN_ALL guint garrow_schema_n_fields(GArrowSchema *schema); + +GARROW_AVAILABLE_IN_ALL GList * garrow_schema_get_fields(GArrowSchema *schema); +GARROW_AVAILABLE_IN_ALL gchar * garrow_schema_to_string(GArrowSchema *schema); + GARROW_AVAILABLE_IN_0_17 gchar * garrow_schema_to_string_metadata(GArrowSchema *schema, gboolean show_metadata); +GARROW_AVAILABLE_IN_ALL GArrowSchema * garrow_schema_add_field(GArrowSchema *schema, guint i, GArrowField *field, GError **error); +GARROW_AVAILABLE_IN_ALL GArrowSchema * garrow_schema_remove_field(GArrowSchema *schema, guint i, GError **error); + +GARROW_AVAILABLE_IN_ALL GArrowSchema * garrow_schema_replace_field(GArrowSchema *schema, guint i, diff --git a/c_glib/arrow-glib/schema.hpp b/c_glib/arrow-glib/schema.hpp index 333f73391c900..ba6c459495461 100644 --- a/c_glib/arrow-glib/schema.hpp +++ b/c_glib/arrow-glib/schema.hpp @@ -23,7 +23,10 @@ #include +GARROW_EXTERN GArrowSchema * garrow_schema_new_raw(std::shared_ptr *arrow_schema); + +GARROW_EXTERN std::shared_ptr garrow_schema_get_raw(GArrowSchema *schema); diff --git a/c_glib/arrow-glib/table-builder.h b/c_glib/arrow-glib/table-builder.h index 0e13352bbdde3..6fad1ae79a40f 100644 --- a/c_glib/arrow-glib/table-builder.h +++ b/c_glib/arrow-glib/table-builder.h @@ -26,6 +26,7 @@ G_BEGIN_DECLS #define GARROW_TYPE_RECORD_BATCH_BUILDER (garrow_record_batch_builder_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowRecordBatchBuilder, garrow_record_batch_builder, GARROW, @@ -36,34 +37,45 @@ struct _GArrowRecordBatchBuilderClass GObjectClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowRecordBatchBuilder * garrow_record_batch_builder_new(GArrowSchema *schema, GError **error); +GARROW_AVAILABLE_IN_ALL gint64 garrow_record_batch_builder_get_initial_capacity(GArrowRecordBatchBuilder *builder); + +GARROW_AVAILABLE_IN_ALL void garrow_record_batch_builder_set_initial_capacity(GArrowRecordBatchBuilder *builder, gint64 capacity); +GARROW_AVAILABLE_IN_ALL GArrowSchema * garrow_record_batch_builder_get_schema(GArrowRecordBatchBuilder *builder); #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_0_13_FOR(garrow_record_batch_builder_get_n_columns) gint garrow_record_batch_builder_get_n_fields(GArrowRecordBatchBuilder *builder); #endif + GARROW_AVAILABLE_IN_0_13 gint garrow_record_batch_builder_get_n_columns(GArrowRecordBatchBuilder *builder); + #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_0_13_FOR(garrow_record_batch_builder_get_column_builder) GArrowArrayBuilder * garrow_record_batch_builder_get_field(GArrowRecordBatchBuilder *builder, gint i); #endif + GARROW_AVAILABLE_IN_0_13 GArrowArrayBuilder * garrow_record_batch_builder_get_column_builder(GArrowRecordBatchBuilder *builder, gint i); +GARROW_AVAILABLE_IN_ALL GArrowRecordBatch * garrow_record_batch_builder_flush(GArrowRecordBatchBuilder *builder, GError **error); diff --git a/c_glib/arrow-glib/table.h b/c_glib/arrow-glib/table.h index 1bf64d25a4f3f..d790e413df5fc 100644 --- a/c_glib/arrow-glib/table.h +++ b/c_glib/arrow-glib/table.h @@ -29,6 +29,7 @@ G_BEGIN_DECLS #define GARROW_TYPE_TABLE_CONCATENATE_OPTIONS \ (garrow_table_concatenate_options_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowTableConcatenateOptions, garrow_table_concatenate_options, GARROW, @@ -44,6 +45,7 @@ GArrowTableConcatenateOptions * garrow_table_concatenate_options_new(void); #define GARROW_TYPE_TABLE (garrow_table_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowTable, garrow_table, GARROW, TABLE, GObject) struct _GArrowTableClass { @@ -53,18 +55,21 @@ struct _GArrowTableClass GARROW_AVAILABLE_IN_0_12 GArrowTable * garrow_table_new_values(GArrowSchema *schema, GList *values, GError **error); + GARROW_AVAILABLE_IN_0_15 GArrowTable * garrow_table_new_chunked_arrays(GArrowSchema *schema, GArrowChunkedArray **chunked_arrays, gsize n_chunked_arrays, GError **error); + GARROW_AVAILABLE_IN_0_12 GArrowTable * garrow_table_new_arrays(GArrowSchema *schema, GArrowArray **arrays, gsize n_arrays, GError **error); + GARROW_AVAILABLE_IN_0_12 GArrowTable * garrow_table_new_record_batches(GArrowSchema *schema, @@ -72,22 +77,29 @@ garrow_table_new_record_batches(GArrowSchema *schema, gsize n_record_batches, GError **error); +GARROW_AVAILABLE_IN_ALL gboolean garrow_table_equal(GArrowTable *table, GArrowTable *other_table); + GARROW_AVAILABLE_IN_0_17 gboolean garrow_table_equal_metadata(GArrowTable *table, GArrowTable *other_table, gboolean check_metadata); +GARROW_AVAILABLE_IN_ALL GArrowSchema * garrow_table_get_schema(GArrowTable *table); + GARROW_AVAILABLE_IN_0_15 GArrowChunkedArray * garrow_table_get_column_data(GArrowTable *table, gint i); +GARROW_AVAILABLE_IN_ALL guint garrow_table_get_n_columns(GArrowTable *table); + +GARROW_AVAILABLE_IN_ALL guint64 garrow_table_get_n_rows(GArrowTable *table); @@ -98,8 +110,11 @@ garrow_table_add_column(GArrowTable *table, GArrowField *field, GArrowChunkedArray *chunked_array, GError **error); + +GARROW_AVAILABLE_IN_ALL GArrowTable * garrow_table_remove_column(GArrowTable *table, guint i, GError **error); + GARROW_AVAILABLE_IN_0_15 GArrowTable * garrow_table_replace_column(GArrowTable *table, @@ -107,22 +122,28 @@ garrow_table_replace_column(GArrowTable *table, GArrowField *field, GArrowChunkedArray *chunked_array, GError **error); + +GARROW_AVAILABLE_IN_ALL gchar * garrow_table_to_string(GArrowTable *table, GError **error); + GARROW_AVAILABLE_IN_0_14 GArrowTable * garrow_table_concatenate(GArrowTable *table, GList *other_tables, GArrowTableConcatenateOptions *options, GError **error); + GARROW_AVAILABLE_IN_0_14 GArrowTable * garrow_table_slice(GArrowTable *table, gint64 offset, gint64 length); + GARROW_AVAILABLE_IN_0_16 GArrowTable * garrow_table_combine_chunks(GArrowTable *table, GError **error); #define GARROW_TYPE_FEATHER_WRITE_PROPERTIES (garrow_feather_write_properties_get_type()) +GARROW_AVAILABLE_IN_0_17 G_DECLARE_DERIVABLE_TYPE(GArrowFeatherWriteProperties, garrow_feather_write_properties, GARROW, diff --git a/c_glib/arrow-glib/table.hpp b/c_glib/arrow-glib/table.hpp index 3077c2ece9b37..79fc97471a42c 100644 --- a/c_glib/arrow-glib/table.hpp +++ b/c_glib/arrow-glib/table.hpp @@ -24,10 +24,14 @@ #include +GARROW_EXTERN GArrowTable * garrow_table_new_raw(std::shared_ptr *arrow_table); + +GARROW_EXTERN std::shared_ptr garrow_table_get_raw(GArrowTable *table); +GARROW_EXTERN arrow::ipc::feather::WriteProperties * garrow_feather_write_properties_get_raw(GArrowFeatherWriteProperties *properties); diff --git a/c_glib/arrow-glib/tensor.h b/c_glib/arrow-glib/tensor.h index a6d11b248110e..5971c3af12600 100644 --- a/c_glib/arrow-glib/tensor.h +++ b/c_glib/arrow-glib/tensor.h @@ -25,12 +25,14 @@ G_BEGIN_DECLS #define GARROW_TYPE_TENSOR (garrow_tensor_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowTensor, garrow_tensor, GARROW, TENSOR, GObject) struct _GArrowTensorClass { GObjectClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowTensor * garrow_tensor_new(GArrowDataType *data_type, GArrowBuffer *data, @@ -40,30 +42,55 @@ garrow_tensor_new(GArrowDataType *data_type, gsize n_strides, gchar **dimension_names, gsize n_dimension_names); +GARROW_AVAILABLE_IN_ALL gboolean garrow_tensor_equal(GArrowTensor *tensor, GArrowTensor *other_tensor); + +GARROW_AVAILABLE_IN_ALL GArrowDataType * garrow_tensor_get_value_data_type(GArrowTensor *tensor); + +GARROW_AVAILABLE_IN_ALL GArrowType garrow_tensor_get_value_type(GArrowTensor *tensor); + +GARROW_AVAILABLE_IN_ALL GArrowBuffer * garrow_tensor_get_buffer(GArrowTensor *tensor); + +GARROW_AVAILABLE_IN_ALL gint64 * garrow_tensor_get_shape(GArrowTensor *tensor, gint *n_dimensions); + +GARROW_AVAILABLE_IN_ALL gint64 * garrow_tensor_get_strides(GArrowTensor *tensor, gint *n_strides); + +GARROW_AVAILABLE_IN_ALL gint garrow_tensor_get_n_dimensions(GArrowTensor *tensor); + +GARROW_AVAILABLE_IN_ALL const gchar * garrow_tensor_get_dimension_name(GArrowTensor *tensor, gint i); + +GARROW_AVAILABLE_IN_ALL gint64 garrow_tensor_get_size(GArrowTensor *tensor); + +GARROW_AVAILABLE_IN_ALL gboolean garrow_tensor_is_mutable(GArrowTensor *tensor); + +GARROW_AVAILABLE_IN_ALL gboolean garrow_tensor_is_contiguous(GArrowTensor *tensor); + +GARROW_AVAILABLE_IN_ALL gboolean garrow_tensor_is_row_major(GArrowTensor *tensor); + +GARROW_AVAILABLE_IN_ALL gboolean garrow_tensor_is_column_major(GArrowTensor *tensor); diff --git a/c_glib/arrow-glib/timestamp-parser.h b/c_glib/arrow-glib/timestamp-parser.h index 05cad54746eeb..a7265d6ef46fb 100644 --- a/c_glib/arrow-glib/timestamp-parser.h +++ b/c_glib/arrow-glib/timestamp-parser.h @@ -26,6 +26,7 @@ G_BEGIN_DECLS #define GARROW_TYPE_TIMESTAMP_PARSER (garrow_timestamp_parser_get_type()) +GARROW_AVAILABLE_IN_16_0 G_DECLARE_DERIVABLE_TYPE( GArrowTimestampParser, garrow_timestamp_parser, GARROW, TIMESTAMP_PARSER, GObject) struct _GArrowTimestampParserClass @@ -39,6 +40,7 @@ garrow_timestamp_parser_get_kind(GArrowTimestampParser *parser); #define GARROW_TYPE_STRPTIME_TIMESTAMP_PARSER \ (garrow_strptime_timestamp_parser_get_type()) +GARROW_AVAILABLE_IN_16_0 G_DECLARE_DERIVABLE_TYPE(GArrowStrptimeTimestampParser, garrow_strptime_timestamp_parser, GARROW, @@ -58,6 +60,7 @@ const gchar * garrow_strptime_timestamp_parser_get_format(GArrowStrptimeTimestampParser *parser); #define GARROW_TYPE_ISO8601_TIMESTAMP_PARSER (garrow_iso8601_timestamp_parser_get_type()) +GARROW_AVAILABLE_IN_16_0 G_DECLARE_DERIVABLE_TYPE(GArrowISO8601TimestampParser, garrow_iso8601_timestamp_parser, GARROW, diff --git a/c_glib/arrow-glib/version.h.in b/c_glib/arrow-glib/version.h.in index a83c68a2a16dc..b530a088c8e38 100644 --- a/c_glib/arrow-glib/version.h.in +++ b/c_glib/arrow-glib/version.h.in @@ -19,6 +19,8 @@ #pragma once +#include + /** * SECTION: version * @section_id: version-macros @@ -36,7 +38,7 @@ * * Since: 0.10.0 */ -#define GARROW_VERSION_MAJOR (@GARROW_VERSION_MAJOR@) +#define GARROW_VERSION_MAJOR (@VERSION_MAJOR@) /** * GARROW_VERSION_MINOR: @@ -45,7 +47,7 @@ * * Since: 0.10.0 */ -#define GARROW_VERSION_MINOR (@GARROW_VERSION_MINOR@) +#define GARROW_VERSION_MINOR (@VERSION_MINOR@) /** * GARROW_VERSION_MICRO: @@ -54,7 +56,7 @@ * * Since: 0.10.0 */ -#define GARROW_VERSION_MICRO (@GARROW_VERSION_MICRO@) +#define GARROW_VERSION_MICRO (@VERSION_MICRO@) /** * GARROW_VERSION_TAG: @@ -64,7 +66,7 @@ * * Since: 0.10.0 */ -#define GARROW_VERSION_TAG "@GARROW_VERSION_TAG@" +#define GARROW_VERSION_TAG "@VERSION_TAG@" /** * GARROW_VERSION_CHECK: @@ -108,212 +110,7 @@ # define GARROW_UNAVAILABLE(major, minor) G_UNAVAILABLE(major, minor) #endif -/** - * GARROW_VERSION_16_0: - * - * You can use this macro value for compile time API version check. - * - * Since: 16.0.0 - */ -#define GARROW_VERSION_16_0 G_ENCODE_VERSION(16, 0) - -/** - * GARROW_VERSION_15_0: - * - * You can use this macro value for compile time API version check. - * - * Since: 15.0.0 - */ -#define GARROW_VERSION_15_0 G_ENCODE_VERSION(15, 0) - -/** - * GARROW_VERSION_14_0: - * - * You can use this macro value for compile time API version check. - * - * Since: 14.0.0 - */ -#define GARROW_VERSION_14_0 G_ENCODE_VERSION(14, 0) - -/** - * GARROW_VERSION_13_0: - * - * You can use this macro value for compile time API version check. - * - * Since: 13.0.0 - */ -#define GARROW_VERSION_13_0 G_ENCODE_VERSION(13, 0) - -/** - * GARROW_VERSION_12_0: - * - * You can use this macro value for compile time API version check. - * - * Since: 12.0.0 - */ -#define GARROW_VERSION_12_0 G_ENCODE_VERSION(12, 0) - -/** - * GARROW_VERSION_11_0: - * - * You can use this macro value for compile time API version check. - * - * Since: 11.0.0 - */ -#define GARROW_VERSION_11_0 G_ENCODE_VERSION(11, 0) - -/** - * GARROW_VERSION_10_0: - * - * You can use this macro value for compile time API version check. - * - * Since: 10.0.0 - */ -#define GARROW_VERSION_10_0 G_ENCODE_VERSION(10, 0) - -/** - * GARROW_VERSION_9_0: - * - * You can use this macro value for compile time API version check. - * - * Since: 9.0.0 - */ -#define GARROW_VERSION_9_0 G_ENCODE_VERSION(9, 0) - -/** - * GARROW_VERSION_8_0: - * - * You can use this macro value for compile time API version check. - * - * Since: 8.0.0 - */ -#define GARROW_VERSION_8_0 G_ENCODE_VERSION(8, 0) - -/** - * GARROW_VERSION_7_0: - * - * You can use this macro value for compile time API version check. - * - * Since: 7.0.0 - */ -#define GARROW_VERSION_7_0 G_ENCODE_VERSION(7, 0) - -/** - * GARROW_VERSION_6_0: - * - * You can use this macro value for compile time API version check. - * - * Since: 6.0.0 - */ -#define GARROW_VERSION_6_0 G_ENCODE_VERSION(6, 0) - -/** - * GARROW_VERSION_5_0: - * - * You can use this macro value for compile time API version check. - * - * Since: 5.0.0 - */ -#define GARROW_VERSION_5_0 G_ENCODE_VERSION(5, 0) - -/** - * GARROW_VERSION_4_0: - * - * You can use this macro value for compile time API version check. - * - * Since: 4.0.0 - */ -#define GARROW_VERSION_4_0 G_ENCODE_VERSION(4, 0) - -/** - * GARROW_VERSION_3_0: - * - * You can use this macro value for compile time API version check. - * - * Since: 3.0.0 - */ -#define GARROW_VERSION_3_0 G_ENCODE_VERSION(3, 0) - -/** - * GARROW_VERSION_2_0: - * - * You can use this macro value for compile time API version check. - * - * Since: 2.0.0 - */ -#define GARROW_VERSION_2_0 G_ENCODE_VERSION(2, 0) - -/** - * GARROW_VERSION_1_0: - * - * You can use this macro value for compile time API version check. - * - * Since: 1.0.0 - */ -#define GARROW_VERSION_1_0 G_ENCODE_VERSION(1, 0) - -/** - * GARROW_VERSION_0_17: - * - * You can use this macro value for compile time API version check. - * - * Since: 0.17.0 - */ -#define GARROW_VERSION_0_17 G_ENCODE_VERSION(0, 17) - -/** - * GARROW_VERSION_0_16: - * - * You can use this macro value for compile time API version check. - * - * Since: 0.16.0 - */ -#define GARROW_VERSION_0_16 G_ENCODE_VERSION(0, 16) - -/** - * GARROW_VERSION_0_15: - * - * You can use this macro value for compile time API version check. - * - * Since: 0.15.0 - */ -#define GARROW_VERSION_0_15 G_ENCODE_VERSION(0, 15) - -/** - * GARROW_VERSION_0_14: - * - * You can use this macro value for compile time API version check. - * - * Since: 0.14.0 - */ -#define GARROW_VERSION_0_14 G_ENCODE_VERSION(0, 14) - -/** - * GARROW_VERSION_0_13: - * - * You can use this macro value for compile time API version check. - * - * Since: 0.13.0 - */ -#define GARROW_VERSION_0_13 G_ENCODE_VERSION(0, 13) - -/** - * GARROW_VERSION_0_12: - * - * You can use this macro value for compile time API version check. - * - * Since: 0.12.0 - */ -#define GARROW_VERSION_0_12 G_ENCODE_VERSION(0, 12) - -/** - * GARROW_VERSION_0_10: - * - * You can use this macro value for compile time API version check. - * - * Since: 0.10.0 - */ -#define GARROW_VERSION_0_10 G_ENCODE_VERSION(0, 10) +@ENCODED_VERSIONS@ /** * GARROW_VERSION_MIN_REQUIRED: @@ -359,327 +156,6 @@ G_ENCODE_VERSION(GARROW_VERSION_MAJOR, GARROW_VERSION_MINOR) #endif +@VISIBILITY_MACROS@ -#define GARROW_AVAILABLE_IN_ALL - -#if GARROW_VERSION_MIN_REQUIRED >= GARROW_VERSION_16_0 -# define GARROW_DEPRECATED_IN_16_0 GARROW_DEPRECATED -# define GARROW_DEPRECATED_IN_16_0_FOR(function) GARROW_DEPRECATED_FOR(function) -#else -# define GARROW_DEPRECATED_IN_16_0 -# define GARROW_DEPRECATED_IN_16_0_FOR(function) -#endif - -#if GARROW_VERSION_MAX_ALLOWED < GARROW_VERSION_16_0 -# define GARROW_AVAILABLE_IN_16_0 GARROW_UNAVAILABLE(16, 0) -#else -# define GARROW_AVAILABLE_IN_16_0 -#endif - -#if GARROW_VERSION_MIN_REQUIRED >= GARROW_VERSION_15_0 -# define GARROW_DEPRECATED_IN_15_0 GARROW_DEPRECATED -# define GARROW_DEPRECATED_IN_15_0_FOR(function) GARROW_DEPRECATED_FOR(function) -#else -# define GARROW_DEPRECATED_IN_15_0 -# define GARROW_DEPRECATED_IN_15_0_FOR(function) -#endif - -#if GARROW_VERSION_MAX_ALLOWED < GARROW_VERSION_15_0 -# define GARROW_AVAILABLE_IN_15_0 GARROW_UNAVAILABLE(15, 0) -#else -# define GARROW_AVAILABLE_IN_15_0 -#endif - -#if GARROW_VERSION_MIN_REQUIRED >= GARROW_VERSION_14_0 -# define GARROW_DEPRECATED_IN_14_0 GARROW_DEPRECATED -# define GARROW_DEPRECATED_IN_14_0_FOR(function) GARROW_DEPRECATED_FOR(function) -#else -# define GARROW_DEPRECATED_IN_14_0 -# define GARROW_DEPRECATED_IN_14_0_FOR(function) -#endif - -#if GARROW_VERSION_MAX_ALLOWED < GARROW_VERSION_14_0 -# define GARROW_AVAILABLE_IN_14_0 GARROW_UNAVAILABLE(14, 0) -#else -# define GARROW_AVAILABLE_IN_14_0 -#endif - -#if GARROW_VERSION_MIN_REQUIRED >= GARROW_VERSION_13_0 -# define GARROW_DEPRECATED_IN_13_0 GARROW_DEPRECATED -# define GARROW_DEPRECATED_IN_13_0_FOR(function) GARROW_DEPRECATED_FOR(function) -#else -# define GARROW_DEPRECATED_IN_13_0 -# define GARROW_DEPRECATED_IN_13_0_FOR(function) -#endif - -#if GARROW_VERSION_MAX_ALLOWED < GARROW_VERSION_13_0 -# define GARROW_AVAILABLE_IN_13_0 GARROW_UNAVAILABLE(13, 0) -#else -# define GARROW_AVAILABLE_IN_13_0 -#endif - -#if GARROW_VERSION_MIN_REQUIRED >= GARROW_VERSION_12_0 -# define GARROW_DEPRECATED_IN_12_0 GARROW_DEPRECATED -# define GARROW_DEPRECATED_IN_12_0_FOR(function) GARROW_DEPRECATED_FOR(function) -#else -# define GARROW_DEPRECATED_IN_12_0 -# define GARROW_DEPRECATED_IN_12_0_FOR(function) -#endif - -#if GARROW_VERSION_MAX_ALLOWED < GARROW_VERSION_12_0 -# define GARROW_AVAILABLE_IN_12_0 GARROW_UNAVAILABLE(12, 0) -#else -# define GARROW_AVAILABLE_IN_12_0 -#endif - -#if GARROW_VERSION_MIN_REQUIRED >= GARROW_VERSION_11_0 -# define GARROW_DEPRECATED_IN_11_0 GARROW_DEPRECATED -# define GARROW_DEPRECATED_IN_11_0_FOR(function) GARROW_DEPRECATED_FOR(function) -#else -# define GARROW_DEPRECATED_IN_11_0 -# define GARROW_DEPRECATED_IN_11_0_FOR(function) -#endif - -#if GARROW_VERSION_MAX_ALLOWED < GARROW_VERSION_11_0 -# define GARROW_AVAILABLE_IN_11_0 GARROW_UNAVAILABLE(11, 0) -#else -# define GARROW_AVAILABLE_IN_11_0 -#endif - -#if GARROW_VERSION_MIN_REQUIRED >= GARROW_VERSION_10_0 -# define GARROW_DEPRECATED_IN_10_0 GARROW_DEPRECATED -# define GARROW_DEPRECATED_IN_10_0_FOR(function) GARROW_DEPRECATED_FOR(function) -#else -# define GARROW_DEPRECATED_IN_10_0 -# define GARROW_DEPRECATED_IN_10_0_FOR(function) -#endif - -#if GARROW_VERSION_MAX_ALLOWED < GARROW_VERSION_10_0 -# define GARROW_AVAILABLE_IN_10_0 GARROW_UNAVAILABLE(10, 0) -#else -# define GARROW_AVAILABLE_IN_10_0 -#endif - -#if GARROW_VERSION_MIN_REQUIRED >= GARROW_VERSION_9_0 -# define GARROW_DEPRECATED_IN_9_0 GARROW_DEPRECATED -# define GARROW_DEPRECATED_IN_9_0_FOR(function) GARROW_DEPRECATED_FOR(function) -#else -# define GARROW_DEPRECATED_IN_9_0 -# define GARROW_DEPRECATED_IN_9_0_FOR(function) -#endif - -#if GARROW_VERSION_MAX_ALLOWED < GARROW_VERSION_9_0 -# define GARROW_AVAILABLE_IN_9_0 GARROW_UNAVAILABLE(9, 0) -#else -# define GARROW_AVAILABLE_IN_9_0 -#endif - -#if GARROW_VERSION_MIN_REQUIRED >= GARROW_VERSION_8_0 -# define GARROW_DEPRECATED_IN_8_0 GARROW_DEPRECATED -# define GARROW_DEPRECATED_IN_8_0_FOR(function) GARROW_DEPRECATED_FOR(function) -#else -# define GARROW_DEPRECATED_IN_8_0 -# define GARROW_DEPRECATED_IN_8_0_FOR(function) -#endif - -#if GARROW_VERSION_MAX_ALLOWED < GARROW_VERSION_8_0 -# define GARROW_AVAILABLE_IN_8_0 GARROW_UNAVAILABLE(8, 0) -#else -# define GARROW_AVAILABLE_IN_8_0 -#endif - -#if GARROW_VERSION_MIN_REQUIRED >= GARROW_VERSION_7_0 -# define GARROW_DEPRECATED_IN_7_0 GARROW_DEPRECATED -# define GARROW_DEPRECATED_IN_7_0_FOR(function) GARROW_DEPRECATED_FOR(function) -#else -# define GARROW_DEPRECATED_IN_7_0 -# define GARROW_DEPRECATED_IN_7_0_FOR(function) -#endif - -#if GARROW_VERSION_MAX_ALLOWED < GARROW_VERSION_7_0 -# define GARROW_AVAILABLE_IN_7_0 GARROW_UNAVAILABLE(7, 0) -#else -# define GARROW_AVAILABLE_IN_7_0 -#endif - -#if GARROW_VERSION_MIN_REQUIRED >= GARROW_VERSION_6_0 -# define GARROW_DEPRECATED_IN_6_0 GARROW_DEPRECATED -# define GARROW_DEPRECATED_IN_6_0_FOR(function) GARROW_DEPRECATED_FOR(function) -#else -# define GARROW_DEPRECATED_IN_6_0 -# define GARROW_DEPRECATED_IN_6_0_FOR(function) -#endif - -#if GARROW_VERSION_MAX_ALLOWED < GARROW_VERSION_6_0 -# define GARROW_AVAILABLE_IN_6_0 GARROW_UNAVAILABLE(6, 0) -#else -# define GARROW_AVAILABLE_IN_6_0 -#endif - -#if GARROW_VERSION_MIN_REQUIRED >= GARROW_VERSION_5_0 -# define GARROW_DEPRECATED_IN_5_0 GARROW_DEPRECATED -# define GARROW_DEPRECATED_IN_5_0_FOR(function) GARROW_DEPRECATED_FOR(function) -#else -# define GARROW_DEPRECATED_IN_5_0 -# define GARROW_DEPRECATED_IN_5_0_FOR(function) -#endif - -#if GARROW_VERSION_MAX_ALLOWED < GARROW_VERSION_5_0 -# define GARROW_AVAILABLE_IN_5_0 GARROW_UNAVAILABLE(5, 0) -#else -# define GARROW_AVAILABLE_IN_5_0 -#endif - -#if GARROW_VERSION_MIN_REQUIRED >= GARROW_VERSION_4_0 -# define GARROW_DEPRECATED_IN_4_0 GARROW_DEPRECATED -# define GARROW_DEPRECATED_IN_4_0_FOR(function) GARROW_DEPRECATED_FOR(function) -#else -# define GARROW_DEPRECATED_IN_4_0 -# define GARROW_DEPRECATED_IN_4_0_FOR(function) -#endif - -#if GARROW_VERSION_MAX_ALLOWED < GARROW_VERSION_4_0 -# define GARROW_AVAILABLE_IN_4_0 GARROW_UNAVAILABLE(4, 0) -#else -# define GARROW_AVAILABLE_IN_4_0 -#endif - -#if GARROW_VERSION_MIN_REQUIRED >= GARROW_VERSION_3_0 -# define GARROW_DEPRECATED_IN_3_0 GARROW_DEPRECATED -# define GARROW_DEPRECATED_IN_3_0_FOR(function) GARROW_DEPRECATED_FOR(function) -#else -# define GARROW_DEPRECATED_IN_3_0 -# define GARROW_DEPRECATED_IN_3_0_FOR(function) -#endif - -#if GARROW_VERSION_MAX_ALLOWED < GARROW_VERSION_3_0 -# define GARROW_AVAILABLE_IN_3_0 GARROW_UNAVAILABLE(3, 0) -#else -# define GARROW_AVAILABLE_IN_3_0 -#endif - -#if GARROW_VERSION_MIN_REQUIRED >= GARROW_VERSION_2_0 -# define GARROW_DEPRECATED_IN_2_0 GARROW_DEPRECATED -# define GARROW_DEPRECATED_IN_2_0_FOR(function) GARROW_DEPRECATED_FOR(function) -#else -# define GARROW_DEPRECATED_IN_2_0 -# define GARROW_DEPRECATED_IN_2_0_FOR(function) -#endif - -#if GARROW_VERSION_MAX_ALLOWED < GARROW_VERSION_2_0 -# define GARROW_AVAILABLE_IN_2_0 GARROW_UNAVAILABLE(2, 0) -#else -# define GARROW_AVAILABLE_IN_2_0 -#endif - -#if GARROW_VERSION_MIN_REQUIRED >= GARROW_VERSION_1_0 -# define GARROW_DEPRECATED_IN_1_0 GARROW_DEPRECATED -# define GARROW_DEPRECATED_IN_1_0_FOR(function) GARROW_DEPRECATED_FOR(function) -#else -# define GARROW_DEPRECATED_IN_1_0 -# define GARROW_DEPRECATED_IN_1_0_FOR(function) -#endif - -#if GARROW_VERSION_MAX_ALLOWED < GARROW_VERSION_1_0 -# define GARROW_AVAILABLE_IN_1_0 GARROW_UNAVAILABLE(1, 0) -#else -# define GARROW_AVAILABLE_IN_1_0 -#endif - -#if GARROW_VERSION_MIN_REQUIRED >= GARROW_VERSION_0_17 -# define GARROW_DEPRECATED_IN_0_17 GARROW_DEPRECATED -# define GARROW_DEPRECATED_IN_0_17_FOR(function) GARROW_DEPRECATED_FOR(function) -#else -# define GARROW_DEPRECATED_IN_0_17 -# define GARROW_DEPRECATED_IN_0_17_FOR(function) -#endif - -#if GARROW_VERSION_MAX_ALLOWED < GARROW_VERSION_0_17 -# define GARROW_AVAILABLE_IN_0_17 GARROW_UNAVAILABLE(0, 17) -#else -# define GARROW_AVAILABLE_IN_0_17 -#endif - -#if GARROW_VERSION_MIN_REQUIRED >= GARROW_VERSION_0_16 -# define GARROW_DEPRECATED_IN_0_16 GARROW_DEPRECATED -# define GARROW_DEPRECATED_IN_0_16_FOR(function) GARROW_DEPRECATED_FOR(function) -#else -# define GARROW_DEPRECATED_IN_0_16 -# define GARROW_DEPRECATED_IN_0_16_FOR(function) -#endif - -#if GARROW_VERSION_MAX_ALLOWED < GARROW_VERSION_0_16 -# define GARROW_AVAILABLE_IN_0_16 GARROW_UNAVAILABLE(0, 16) -#else -# define GARROW_AVAILABLE_IN_0_16 -#endif - -#if GARROW_VERSION_MIN_REQUIRED >= GARROW_VERSION_0_15 -# define GARROW_DEPRECATED_IN_0_15 GARROW_DEPRECATED -# define GARROW_DEPRECATED_IN_0_15_FOR(function) GARROW_DEPRECATED_FOR(function) -#else -# define GARROW_DEPRECATED_IN_0_15 -# define GARROW_DEPRECATED_IN_0_15_FOR(function) -#endif - -#if GARROW_VERSION_MAX_ALLOWED < GARROW_VERSION_0_15 -# define GARROW_AVAILABLE_IN_0_15 GARROW_UNAVAILABLE(0, 15) -#else -# define GARROW_AVAILABLE_IN_0_15 -#endif - -#if GARROW_VERSION_MIN_REQUIRED >= GARROW_VERSION_0_14 -# define GARROW_DEPRECATED_IN_0_14 GARROW_DEPRECATED -# define GARROW_DEPRECATED_IN_0_14_FOR(function) GARROW_DEPRECATED_FOR(function) -#else -# define GARROW_DEPRECATED_IN_0_14 -# define GARROW_DEPRECATED_IN_0_14_FOR(function) -#endif - -#if GARROW_VERSION_MAX_ALLOWED < GARROW_VERSION_0_14 -# define GARROW_AVAILABLE_IN_0_14 GARROW_UNAVAILABLE(0, 14) -#else -# define GARROW_AVAILABLE_IN_0_14 -#endif - -#if GARROW_VERSION_MIN_REQUIRED >= GARROW_VERSION_0_13 -# define GARROW_DEPRECATED_IN_0_13 GARROW_DEPRECATED -# define GARROW_DEPRECATED_IN_0_13_FOR(function) GARROW_DEPRECATED_FOR(function) -#else -# define GARROW_DEPRECATED_IN_0_13 -# define GARROW_DEPRECATED_IN_0_13_FOR(function) -#endif - -#if GARROW_VERSION_MAX_ALLOWED < GARROW_VERSION_0_13 -# define GARROW_AVAILABLE_IN_0_13 GARROW_UNAVAILABLE(0, 13) -#else -# define GARROW_AVAILABLE_IN_0_13 -#endif - -#if GARROW_VERSION_MIN_REQUIRED >= GARROW_VERSION_0_12 -# define GARROW_DEPRECATED_IN_0_12 GARROW_DEPRECATED -# define GARROW_DEPRECATED_IN_0_12_FOR(function) GARROW_DEPRECATED_FOR(function) -#else -# define GARROW_DEPRECATED_IN_0_12 -# define GARROW_DEPRECATED_IN_0_12_FOR(function) -#endif - -#if GARROW_VERSION_MAX_ALLOWED < GARROW_VERSION_0_12 -# define GARROW_AVAILABLE_IN_0_12 GARROW_UNAVAILABLE(0, 12) -#else -# define GARROW_AVAILABLE_IN_0_12 -#endif - -#if GARROW_VERSION_MIN_REQUIRED >= GARROW_VERSION_0_10 -# define GARROW_DEPRECATED_IN_0_10 GARROW_DEPRECATED -# define GARROW_DEPRECATED_IN_0_10_FOR(function) GARROW_DEPRECATED_FOR(function) -#else -# define GARROW_DEPRECATED_IN_0_10 -# define GARROW_DEPRECATED_IN_0_10_FOR(function) -#endif - -#if GARROW_VERSION_MAX_ALLOWED < GARROW_VERSION_0_10 -# define GARROW_AVAILABLE_IN_0_10 GARROW_UNAVAILABLE(0, 10) -#else -# define GARROW_AVAILABLE_IN_0_10 -#endif +@AVAILABILITY_MACROS@ diff --git a/c_glib/arrow-glib/writable-file.h b/c_glib/arrow-glib/writable-file.h index 555705767e4aa..e9aa9122e92fa 100644 --- a/c_glib/arrow-glib/writable-file.h +++ b/c_glib/arrow-glib/writable-file.h @@ -24,9 +24,11 @@ G_BEGIN_DECLS #define GARROW_TYPE_WRITABLE_FILE (garrow_writable_file_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_INTERFACE( GArrowWritableFile, garrow_writable_file, GARROW, WRITABLE_FILE, GObject) +GARROW_AVAILABLE_IN_ALL gboolean garrow_writable_file_write_at(GArrowWritableFile *writable_file, gint64 position, diff --git a/c_glib/arrow-glib/writable.h b/c_glib/arrow-glib/writable.h index a556443967b5a..dcc1e67668e78 100644 --- a/c_glib/arrow-glib/writable.h +++ b/c_glib/arrow-glib/writable.h @@ -24,13 +24,17 @@ G_BEGIN_DECLS #define GARROW_TYPE_WRITABLE (garrow_writable_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_INTERFACE(GArrowWritable, garrow_writable, GARROW, WRITABLE, GObject) +GARROW_AVAILABLE_IN_ALL gboolean garrow_writable_write(GArrowWritable *writable, const guint8 *data, gint64 n_bytes, GError **error); + +GARROW_AVAILABLE_IN_ALL gboolean garrow_writable_flush(GArrowWritable *writable, GError **error); diff --git a/c_glib/arrow-glib/writer.h b/c_glib/arrow-glib/writer.h index 30b0ea987da39..46bbdddec8c9d 100644 --- a/c_glib/arrow-glib/writer.h +++ b/c_glib/arrow-glib/writer.h @@ -28,6 +28,7 @@ G_BEGIN_DECLS #define GARROW_TYPE_RECORD_BATCH_WRITER (garrow_record_batch_writer_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowRecordBatchWriter, garrow_record_batch_writer, GARROW, @@ -38,19 +39,23 @@ struct _GArrowRecordBatchWriterClass GObjectClass parent_class; }; +GARROW_AVAILABLE_IN_ALL gboolean garrow_record_batch_writer_write_record_batch(GArrowRecordBatchWriter *writer, GArrowRecordBatch *record_batch, GError **error); +GARROW_AVAILABLE_IN_ALL gboolean garrow_record_batch_writer_write_table(GArrowRecordBatchWriter *writer, GArrowTable *table, GError **error); +GARROW_AVAILABLE_IN_ALL gboolean garrow_record_batch_writer_close(GArrowRecordBatchWriter *writer, GError **error); #define GARROW_TYPE_RECORD_BATCH_STREAM_WRITER \ (garrow_record_batch_stream_writer_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowRecordBatchStreamWriter, garrow_record_batch_stream_writer, GARROW, @@ -61,12 +66,14 @@ struct _GArrowRecordBatchStreamWriterClass GArrowRecordBatchWriterClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowRecordBatchStreamWriter * garrow_record_batch_stream_writer_new(GArrowOutputStream *sink, GArrowSchema *schema, GError **error); #define GARROW_TYPE_RECORD_BATCH_FILE_WRITER (garrow_record_batch_file_writer_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowRecordBatchFileWriter, garrow_record_batch_file_writer, GARROW, @@ -77,6 +84,7 @@ struct _GArrowRecordBatchFileWriterClass GArrowRecordBatchStreamWriterClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowRecordBatchFileWriter * garrow_record_batch_file_writer_new(GArrowOutputStream *sink, GArrowSchema *schema, diff --git a/c_glib/gandiva-glib/expression.h b/c_glib/gandiva-glib/expression.h index f8f061ceb08fa..bb7eb22ac01dc 100644 --- a/c_glib/gandiva-glib/expression.h +++ b/c_glib/gandiva-glib/expression.h @@ -26,6 +26,7 @@ G_BEGIN_DECLS #define GGANDIVA_TYPE_EXPRESSION (ggandiva_expression_get_type()) +GGANDIVA_AVAILABLE_IN_0_12 G_DECLARE_DERIVABLE_TYPE( GGandivaExpression, ggandiva_expression, GGANDIVA, EXPRESSION, GObject) @@ -34,12 +35,16 @@ struct _GGandivaExpressionClass GObjectClass parent_class; }; +GGANDIVA_AVAILABLE_IN_0_12 GGandivaExpression * ggandiva_expression_new(GGandivaNode *root_node, GArrowField *result_field); + +GGANDIVA_AVAILABLE_IN_0_12 gchar * ggandiva_expression_to_string(GGandivaExpression *expression); #define GGANDIVA_TYPE_CONDITION (ggandiva_condition_get_type()) +GGANDIVA_AVAILABLE_IN_4_0 G_DECLARE_DERIVABLE_TYPE( GGandivaCondition, ggandiva_condition, GGANDIVA, CONDITION, GGandivaExpression) diff --git a/c_glib/gandiva-glib/filter.h b/c_glib/gandiva-glib/filter.h index b95981198e0c4..0a2199ccfa106 100644 --- a/c_glib/gandiva-glib/filter.h +++ b/c_glib/gandiva-glib/filter.h @@ -25,6 +25,7 @@ G_BEGIN_DECLS #define GGANDIVA_TYPE_FILTER (ggandiva_filter_get_type()) +GGANDIVA_AVAILABLE_IN_4_0 G_DECLARE_DERIVABLE_TYPE(GGandivaFilter, ggandiva_filter, GGANDIVA, FILTER, GObject) struct _GGandivaFilterClass @@ -32,8 +33,11 @@ struct _GGandivaFilterClass GObjectClass parent_class; }; +GGANDIVA_AVAILABLE_IN_4_0 GGandivaFilter * ggandiva_filter_new(GArrowSchema *schema, GGandivaCondition *condition, GError **error); + +GGANDIVA_AVAILABLE_IN_4_0 gboolean ggandiva_filter_evaluate(GGandivaFilter *filter, GArrowRecordBatch *record_batch, diff --git a/c_glib/gandiva-glib/function-registry.h b/c_glib/gandiva-glib/function-registry.h index ed21e120a2533..e13f4b36d28dc 100644 --- a/c_glib/gandiva-glib/function-registry.h +++ b/c_glib/gandiva-glib/function-registry.h @@ -24,6 +24,7 @@ G_BEGIN_DECLS #define GGANDIVA_TYPE_FUNCTION_REGISTRY (ggandiva_function_registry_get_type()) +GGANDIVA_AVAILABLE_IN_14_0 G_DECLARE_DERIVABLE_TYPE(GGandivaFunctionRegistry, ggandiva_function_registry, GGANDIVA, @@ -35,14 +36,20 @@ struct _GGandivaFunctionRegistryClass GObjectClass parent_class; }; -GARROW_AVAILABLE_IN_15_0 +GGANDIVA_AVAILABLE_IN_15_0 GGandivaFunctionRegistry * ggandiva_function_registry_default(void); + +GGANDIVA_AVAILABLE_IN_14_0 GGandivaFunctionRegistry * ggandiva_function_registry_new(void); + +GGANDIVA_AVAILABLE_IN_14_0 GGandivaNativeFunction * ggandiva_function_registry_lookup(GGandivaFunctionRegistry *function_registry, GGandivaFunctionSignature *function_signature); + +GGANDIVA_AVAILABLE_IN_14_0 GList * ggandiva_function_registry_get_native_functions( GGandivaFunctionRegistry *function_registry); diff --git a/c_glib/gandiva-glib/function-signature.h b/c_glib/gandiva-glib/function-signature.h index ef6834ea85723..4fd8cc8a7e761 100644 --- a/c_glib/gandiva-glib/function-signature.h +++ b/c_glib/gandiva-glib/function-signature.h @@ -21,9 +21,12 @@ #include +#include + G_BEGIN_DECLS #define GGANDIVA_TYPE_FUNCTION_SIGNATURE (ggandiva_function_signature_get_type()) +GGANDIVA_AVAILABLE_IN_14_0 G_DECLARE_DERIVABLE_TYPE(GGandivaFunctionSignature, ggandiva_function_signature, GGANDIVA, @@ -35,20 +38,31 @@ struct _GGandivaFunctionSignatureClass GObjectClass parent_class; }; +GGANDIVA_AVAILABLE_IN_14_0 GGandivaFunctionSignature * ggandiva_function_signature_new(const gchar *base_name, GList *parameter_types, GArrowDataType *return_type); + +GGANDIVA_AVAILABLE_IN_14_0 gboolean ggandiva_function_signature_equal(GGandivaFunctionSignature *function_signature, GGandivaFunctionSignature *other_function_signature); + +GGANDIVA_AVAILABLE_IN_14_0 gchar * ggandiva_function_signature_to_string(GGandivaFunctionSignature *function_signature); + +GGANDIVA_AVAILABLE_IN_14_0 GArrowDataType * ggandiva_function_signature_get_return_type( GGandivaFunctionSignature *function_signature); + +GGANDIVA_AVAILABLE_IN_14_0 gchar * ggandiva_function_signature_get_base_name(GGandivaFunctionSignature *function_signature); + +GGANDIVA_AVAILABLE_IN_14_0 GList * ggandiva_function_signature_get_param_types( GGandivaFunctionSignature *function_signature); diff --git a/c_glib/gandiva-glib/meson.build b/c_glib/gandiva-glib/meson.build index d5cab109dcf89..8cd00b3805b91 100644 --- a/c_glib/gandiva-glib/meson.build +++ b/c_glib/gandiva-glib/meson.build @@ -53,14 +53,12 @@ cpp_headers = files( 'selection-vector.hpp', ) -version_h_conf = configuration_data() -version_h_conf.set('GGANDIVA_VERSION_MAJOR', version_major) -version_h_conf.set('GGANDIVA_VERSION_MINOR', version_minor) -version_h_conf.set('GGANDIVA_VERSION_MICRO', version_micro) -version_h_conf.set('GGANDIVA_VERSION_TAG', version_tag) -version_h = configure_file(input: 'version.h.in', - output: 'version.h', - configuration: version_h_conf) +version_h = configure_file( + input: 'version.h.in', + output: 'version.h', + command: [python3, generate_version_header_py, '--library', 'GGANDIVA', '--version', version, '--input', '@INPUT@', '--output', '@OUTPUT@'], +) + c_headers += version_h enums = gnome.mkenums('enums', @@ -87,6 +85,7 @@ libgandiva_glib = library('gandiva-glib', dependencies: dependencies, implicit_include_directories: false, include_directories: base_include_directories, + cpp_args: ['-DGGANDIVA_COMPILATION'], soversion: so_version, version: library_version) gandiva_glib = declare_dependency(link_with: libgandiva_glib, diff --git a/c_glib/gandiva-glib/native-function.h b/c_glib/gandiva-glib/native-function.h index 5ceef396ef40c..934d29ab7e33b 100644 --- a/c_glib/gandiva-glib/native-function.h +++ b/c_glib/gandiva-glib/native-function.h @@ -40,6 +40,7 @@ typedef enum { } GGandivaResultNullableType; #define GGANDIVA_TYPE_NATIVE_FUNCTION (ggandiva_native_function_get_type()) +GGANDIVA_AVAILABLE_IN_14_0 G_DECLARE_DERIVABLE_TYPE( GGandivaNativeFunction, ggandiva_native_function, GGANDIVA, NATIVE_FUNCTION, GObject) @@ -48,20 +49,33 @@ struct _GGandivaNativeFunctionClass GObjectClass parent_class; }; +GGANDIVA_AVAILABLE_IN_14_0 GList * ggandiva_native_function_get_signatures(GGandivaNativeFunction *native_function); + +GGANDIVA_AVAILABLE_IN_14_0 gboolean ggandiva_native_function_equal(GGandivaNativeFunction *native_function, GGandivaNativeFunction *other_native_function); + +GGANDIVA_AVAILABLE_IN_14_0 gchar * ggandiva_native_function_to_string(GGandivaNativeFunction *native_function); + +GGANDIVA_AVAILABLE_IN_14_0 GGandivaResultNullableType ggandiva_native_function_get_result_nullable_type( GGandivaNativeFunction *native_function); + +GGANDIVA_AVAILABLE_IN_14_0 gboolean ggandiva_native_function_need_context(GGandivaNativeFunction *native_function); + +GGANDIVA_AVAILABLE_IN_14_0 gboolean ggandiva_native_function_need_function_holder(GGandivaNativeFunction *native_function); + +GGANDIVA_AVAILABLE_IN_14_0 gboolean ggandiva_native_function_can_return_errors(GGandivaNativeFunction *native_function); diff --git a/c_glib/gandiva-glib/node.h b/c_glib/gandiva-glib/node.h index 715a3d6ebaf18..1733cac918c51 100644 --- a/c_glib/gandiva-glib/node.h +++ b/c_glib/gandiva-glib/node.h @@ -26,6 +26,7 @@ G_BEGIN_DECLS #define GGANDIVA_TYPE_NODE (ggandiva_node_get_type()) +GGANDIVA_AVAILABLE_IN_0_12 G_DECLARE_DERIVABLE_TYPE(GGandivaNode, ggandiva_node, GGANDIVA, NODE, GObject) struct _GGandivaNodeClass @@ -33,10 +34,12 @@ struct _GGandivaNodeClass GObjectClass parent_class; }; +GGANDIVA_AVAILABLE_IN_0_16 gchar * ggandiva_node_to_string(GGandivaNode *node); #define GGANDIVA_TYPE_FIELD_NODE (ggandiva_field_node_get_type()) +GGANDIVA_AVAILABLE_IN_0_12 G_DECLARE_DERIVABLE_TYPE( GGandivaFieldNode, ggandiva_field_node, GGANDIVA, FIELD_NODE, GGandivaNode) struct _GGandivaFieldNodeClass @@ -44,10 +47,12 @@ struct _GGandivaFieldNodeClass GGandivaNodeClass parent_class; }; +GGANDIVA_AVAILABLE_IN_0_12 GGandivaFieldNode * ggandiva_field_node_new(GArrowField *field); #define GGANDIVA_TYPE_FUNCTION_NODE (ggandiva_function_node_get_type()) +GGANDIVA_AVAILABLE_IN_0_12 G_DECLARE_DERIVABLE_TYPE( GGandivaFunctionNode, ggandiva_function_node, GGANDIVA, FUNCTION_NODE, GGandivaNode) struct _GGandivaFunctionNodeClass @@ -55,14 +60,18 @@ struct _GGandivaFunctionNodeClass GGandivaNodeClass parent_class; }; +GGANDIVA_AVAILABLE_IN_0_12 GGandivaFunctionNode * ggandiva_function_node_new(const gchar *name, GList *parameters, GArrowDataType *return_type); + +GGANDIVA_AVAILABLE_IN_0_12 GList * ggandiva_function_node_get_parameters(GGandivaFunctionNode *node); #define GGANDIVA_TYPE_LITERAL_NODE (ggandiva_literal_node_get_type()) +GGANDIVA_AVAILABLE_IN_0_12 G_DECLARE_DERIVABLE_TYPE( GGandivaLiteralNode, ggandiva_literal_node, GGANDIVA, LITERAL_NODE, GGandivaNode) struct _GGandivaLiteralNodeClass @@ -71,6 +80,7 @@ struct _GGandivaLiteralNodeClass }; #define GGANDIVA_TYPE_NULL_LITERAL_NODE (ggandiva_null_literal_node_get_type()) +GGANDIVA_AVAILABLE_IN_0_12 G_DECLARE_DERIVABLE_TYPE(GGandivaNullLiteralNode, ggandiva_null_literal_node, GGANDIVA, @@ -81,10 +91,12 @@ struct _GGandivaNullLiteralNodeClass GGandivaLiteralNodeClass parent_class; }; +GGANDIVA_AVAILABLE_IN_0_12 GGandivaNullLiteralNode * ggandiva_null_literal_node_new(GArrowDataType *return_type, GError **error); #define GGANDIVA_TYPE_BOOLEAN_LITERAL_NODE (ggandiva_boolean_literal_node_get_type()) +GGANDIVA_AVAILABLE_IN_0_12 G_DECLARE_DERIVABLE_TYPE(GGandivaBooleanLiteralNode, ggandiva_boolean_literal_node, GGANDIVA, @@ -95,12 +107,16 @@ struct _GGandivaBooleanLiteralNodeClass GGandivaLiteralNodeClass parent_class; }; +GGANDIVA_AVAILABLE_IN_0_12 GGandivaBooleanLiteralNode * ggandiva_boolean_literal_node_new(gboolean value); + +GGANDIVA_AVAILABLE_IN_0_12 gboolean ggandiva_boolean_literal_node_get_value(GGandivaBooleanLiteralNode *node); #define GGANDIVA_TYPE_INT8_LITERAL_NODE (ggandiva_int8_literal_node_get_type()) +GGANDIVA_AVAILABLE_IN_0_12 G_DECLARE_DERIVABLE_TYPE(GGandivaInt8LiteralNode, ggandiva_int8_literal_node, GGANDIVA, @@ -111,12 +127,16 @@ struct _GGandivaInt8LiteralNodeClass GGandivaLiteralNodeClass parent_class; }; +GGANDIVA_AVAILABLE_IN_0_12 GGandivaInt8LiteralNode * ggandiva_int8_literal_node_new(gint8 value); + +GGANDIVA_AVAILABLE_IN_0_12 gint8 ggandiva_int8_literal_node_get_value(GGandivaInt8LiteralNode *node); #define GGANDIVA_TYPE_UINT8_LITERAL_NODE (ggandiva_uint8_literal_node_get_type()) +GGANDIVA_AVAILABLE_IN_0_12 G_DECLARE_DERIVABLE_TYPE(GGandivaUInt8LiteralNode, ggandiva_uint8_literal_node, GGANDIVA, @@ -127,12 +147,16 @@ struct _GGandivaUInt8LiteralNodeClass GGandivaLiteralNodeClass parent_class; }; +GGANDIVA_AVAILABLE_IN_0_12 GGandivaUInt8LiteralNode * ggandiva_uint8_literal_node_new(guint8 value); + +GGANDIVA_AVAILABLE_IN_0_12 guint8 ggandiva_uint8_literal_node_get_value(GGandivaUInt8LiteralNode *node); #define GGANDIVA_TYPE_INT16_LITERAL_NODE (ggandiva_int16_literal_node_get_type()) +GGANDIVA_AVAILABLE_IN_0_12 G_DECLARE_DERIVABLE_TYPE(GGandivaInt16LiteralNode, ggandiva_int16_literal_node, GGANDIVA, @@ -143,12 +167,16 @@ struct _GGandivaInt16LiteralNodeClass GGandivaLiteralNodeClass parent_class; }; +GGANDIVA_AVAILABLE_IN_0_12 GGandivaInt16LiteralNode * ggandiva_int16_literal_node_new(gint16 value); + +GGANDIVA_AVAILABLE_IN_0_12 gint16 ggandiva_int16_literal_node_get_value(GGandivaInt16LiteralNode *node); #define GGANDIVA_TYPE_UINT16_LITERAL_NODE (ggandiva_uint16_literal_node_get_type()) +GGANDIVA_AVAILABLE_IN_0_12 G_DECLARE_DERIVABLE_TYPE(GGandivaUInt16LiteralNode, ggandiva_uint16_literal_node, GGANDIVA, @@ -159,12 +187,16 @@ struct _GGandivaUInt16LiteralNodeClass GGandivaLiteralNodeClass parent_class; }; +GGANDIVA_AVAILABLE_IN_0_12 GGandivaUInt16LiteralNode * ggandiva_uint16_literal_node_new(guint16 value); + +GGANDIVA_AVAILABLE_IN_0_12 guint16 ggandiva_uint16_literal_node_get_value(GGandivaUInt16LiteralNode *node); #define GGANDIVA_TYPE_INT32_LITERAL_NODE (ggandiva_int32_literal_node_get_type()) +GGANDIVA_AVAILABLE_IN_0_12 G_DECLARE_DERIVABLE_TYPE(GGandivaInt32LiteralNode, ggandiva_int32_literal_node, GGANDIVA, @@ -175,12 +207,16 @@ struct _GGandivaInt32LiteralNodeClass GGandivaLiteralNodeClass parent_class; }; +GGANDIVA_AVAILABLE_IN_0_12 GGandivaInt32LiteralNode * ggandiva_int32_literal_node_new(gint32 value); + +GGANDIVA_AVAILABLE_IN_0_12 gint32 ggandiva_int32_literal_node_get_value(GGandivaInt32LiteralNode *node); #define GGANDIVA_TYPE_UINT32_LITERAL_NODE (ggandiva_uint32_literal_node_get_type()) +GGANDIVA_AVAILABLE_IN_0_12 G_DECLARE_DERIVABLE_TYPE(GGandivaUInt32LiteralNode, ggandiva_uint32_literal_node, GGANDIVA, @@ -191,12 +227,16 @@ struct _GGandivaUInt32LiteralNodeClass GGandivaLiteralNodeClass parent_class; }; +GGANDIVA_AVAILABLE_IN_0_12 GGandivaUInt32LiteralNode * ggandiva_uint32_literal_node_new(guint32 value); + +GGANDIVA_AVAILABLE_IN_0_12 guint32 ggandiva_uint32_literal_node_get_value(GGandivaUInt32LiteralNode *node); #define GGANDIVA_TYPE_INT64_LITERAL_NODE (ggandiva_int64_literal_node_get_type()) +GGANDIVA_AVAILABLE_IN_0_12 G_DECLARE_DERIVABLE_TYPE(GGandivaInt64LiteralNode, ggandiva_int64_literal_node, GGANDIVA, @@ -207,12 +247,16 @@ struct _GGandivaInt64LiteralNodeClass GGandivaLiteralNodeClass parent_class; }; +GGANDIVA_AVAILABLE_IN_0_12 GGandivaInt64LiteralNode * ggandiva_int64_literal_node_new(gint64 value); + +GGANDIVA_AVAILABLE_IN_0_12 gint64 ggandiva_int64_literal_node_get_value(GGandivaInt64LiteralNode *node); #define GGANDIVA_TYPE_UINT64_LITERAL_NODE (ggandiva_uint64_literal_node_get_type()) +GGANDIVA_AVAILABLE_IN_0_12 G_DECLARE_DERIVABLE_TYPE(GGandivaUInt64LiteralNode, ggandiva_uint64_literal_node, GGANDIVA, @@ -223,12 +267,16 @@ struct _GGandivaUInt64LiteralNodeClass GGandivaLiteralNodeClass parent_class; }; +GGANDIVA_AVAILABLE_IN_0_12 GGandivaUInt64LiteralNode * ggandiva_uint64_literal_node_new(guint64 value); + +GGANDIVA_AVAILABLE_IN_0_12 guint64 ggandiva_uint64_literal_node_get_value(GGandivaUInt64LiteralNode *node); #define GGANDIVA_TYPE_FLOAT_LITERAL_NODE (ggandiva_float_literal_node_get_type()) +GGANDIVA_AVAILABLE_IN_0_12 G_DECLARE_DERIVABLE_TYPE(GGandivaFloatLiteralNode, ggandiva_float_literal_node, GGANDIVA, @@ -239,12 +287,16 @@ struct _GGandivaFloatLiteralNodeClass GGandivaLiteralNodeClass parent_class; }; +GGANDIVA_AVAILABLE_IN_0_12 GGandivaFloatLiteralNode * ggandiva_float_literal_node_new(gfloat value); + +GGANDIVA_AVAILABLE_IN_0_12 gfloat ggandiva_float_literal_node_get_value(GGandivaFloatLiteralNode *node); #define GGANDIVA_TYPE_DOUBLE_LITERAL_NODE (ggandiva_double_literal_node_get_type()) +GGANDIVA_AVAILABLE_IN_0_12 G_DECLARE_DERIVABLE_TYPE(GGandivaDoubleLiteralNode, ggandiva_double_literal_node, GGANDIVA, @@ -255,12 +307,16 @@ struct _GGandivaDoubleLiteralNodeClass GGandivaLiteralNodeClass parent_class; }; +GGANDIVA_AVAILABLE_IN_0_12 GGandivaDoubleLiteralNode * ggandiva_double_literal_node_new(gdouble value); + +GGANDIVA_AVAILABLE_IN_0_12 gdouble ggandiva_double_literal_node_get_value(GGandivaDoubleLiteralNode *node); #define GGANDIVA_TYPE_BINARY_LITERAL_NODE (ggandiva_binary_literal_node_get_type()) +GGANDIVA_AVAILABLE_IN_0_12 G_DECLARE_DERIVABLE_TYPE(GGandivaBinaryLiteralNode, ggandiva_binary_literal_node, GGANDIVA, @@ -271,14 +327,20 @@ struct _GGandivaBinaryLiteralNodeClass GGandivaLiteralNodeClass parent_class; }; +GGANDIVA_AVAILABLE_IN_0_12 GGandivaBinaryLiteralNode * ggandiva_binary_literal_node_new(const guint8 *value, gsize size); + +GGANDIVA_AVAILABLE_IN_0_12 GGandivaBinaryLiteralNode * ggandiva_binary_literal_node_new_bytes(GBytes *value); + +GGANDIVA_AVAILABLE_IN_0_12 GBytes * ggandiva_binary_literal_node_get_value(GGandivaBinaryLiteralNode *node); #define GGANDIVA_TYPE_STRING_LITERAL_NODE (ggandiva_string_literal_node_get_type()) +GGANDIVA_AVAILABLE_IN_0_12 G_DECLARE_DERIVABLE_TYPE(GGandivaStringLiteralNode, ggandiva_string_literal_node, GGANDIVA, @@ -289,12 +351,16 @@ struct _GGandivaStringLiteralNodeClass GGandivaLiteralNodeClass parent_class; }; +GGANDIVA_AVAILABLE_IN_0_12 GGandivaStringLiteralNode * ggandiva_string_literal_node_new(const gchar *value); + +GGANDIVA_AVAILABLE_IN_0_12 const gchar * ggandiva_string_literal_node_get_value(GGandivaStringLiteralNode *node); #define GGANDIVA_TYPE_IF_NODE (ggandiva_if_node_get_type()) +GGANDIVA_AVAILABLE_IN_0_12 G_DECLARE_DERIVABLE_TYPE( GGandivaIfNode, ggandiva_if_node, GGANDIVA, IF_NODE, GGandivaNode) struct _GGandivaIfNodeClass @@ -302,6 +368,7 @@ struct _GGandivaIfNodeClass GGandivaNodeClass parent_class; }; +GGANDIVA_AVAILABLE_IN_0_12 GGandivaIfNode * ggandiva_if_node_new(GGandivaNode *condition_node, GGandivaNode *then_node, @@ -310,6 +377,7 @@ ggandiva_if_node_new(GGandivaNode *condition_node, GError **error); #define GGANDIVA_TYPE_BOOLEAN_NODE (ggandiva_boolean_node_get_type()) +GGANDIVA_AVAILABLE_IN_0_17 G_DECLARE_DERIVABLE_TYPE( GGandivaBooleanNode, ggandiva_boolean_node, GGANDIVA, BOOLEAN_NODE, GGandivaNode) @@ -323,6 +391,7 @@ GList * ggandiva_boolean_node_get_children(GGandivaBooleanNode *node); #define GGANDIVA_TYPE_AND_NODE (ggandiva_and_node_get_type()) +GGANDIVA_AVAILABLE_IN_0_17 G_DECLARE_DERIVABLE_TYPE( GGandivaAndNode, ggandiva_and_node, GGANDIVA, AND_NODE, GGandivaBooleanNode) struct _GGandivaAndNodeClass @@ -335,6 +404,7 @@ GGandivaAndNode * ggandiva_and_node_new(GList *children); #define GGANDIVA_TYPE_OR_NODE (ggandiva_or_node_get_type()) +GGANDIVA_AVAILABLE_IN_0_17 G_DECLARE_DERIVABLE_TYPE( GGandivaOrNode, ggandiva_or_node, GGANDIVA, OR_NODE, GGandivaBooleanNode) struct _GGandivaOrNodeClass diff --git a/c_glib/gandiva-glib/projector.h b/c_glib/gandiva-glib/projector.h index e0afec5cb1ba1..5fbf9c290beab 100644 --- a/c_glib/gandiva-glib/projector.h +++ b/c_glib/gandiva-glib/projector.h @@ -24,6 +24,7 @@ G_BEGIN_DECLS #define GGANDIVA_TYPE_PROJECTOR (ggandiva_projector_get_type()) +GGANDIVA_AVAILABLE_IN_0_12 G_DECLARE_DERIVABLE_TYPE( GGandivaProjector, ggandiva_projector, GGANDIVA, PROJECTOR, GObject) @@ -32,14 +33,18 @@ struct _GGandivaProjectorClass GObjectClass parent_class; }; +GGANDIVA_AVAILABLE_IN_0_12 GGandivaProjector * ggandiva_projector_new(GArrowSchema *schema, GList *expressions, GError **error); + +GGANDIVA_AVAILABLE_IN_0_12 GList * ggandiva_projector_evaluate(GGandivaProjector *projector, GArrowRecordBatch *record_batch, GError **error); #define GGANDIVA_TYPE_SELECTABLE_PROJECTOR (ggandiva_selectable_projector_get_type()) +GGANDIVA_AVAILABLE_IN_4_0 G_DECLARE_DERIVABLE_TYPE(GGandivaSelectableProjector, ggandiva_selectable_projector, GGANDIVA, diff --git a/c_glib/gandiva-glib/selection-vector.h b/c_glib/gandiva-glib/selection-vector.h index 6d78192e35e28..558b9b950cf84 100644 --- a/c_glib/gandiva-glib/selection-vector.h +++ b/c_glib/gandiva-glib/selection-vector.h @@ -47,6 +47,7 @@ typedef enum { } GGandivaSelectionVectorMode; #define GGANDIVA_TYPE_SELECTION_VECTOR (ggandiva_selection_vector_get_type()) +GGANDIVA_AVAILABLE_IN_4_0 G_DECLARE_DERIVABLE_TYPE( GGandivaSelectionVector, ggandiva_selection_vector, GGANDIVA, SELECTION_VECTOR, GObject) @@ -65,6 +66,7 @@ ggandiva_selection_vector_to_array(GGandivaSelectionVector *selection_vector); #define GGANDIVA_TYPE_UINT16_SELECTION_VECTOR \ (ggandiva_uint16_selection_vector_get_type()) +GGANDIVA_AVAILABLE_IN_4_0 G_DECLARE_DERIVABLE_TYPE(GGandivaUInt16SelectionVector, ggandiva_uint16_selection_vector, GGANDIVA, @@ -82,6 +84,7 @@ ggandiva_uint16_selection_vector_new(gint64 max_slots, GError **error); #define GGANDIVA_TYPE_UINT32_SELECTION_VECTOR \ (ggandiva_uint32_selection_vector_get_type()) +GGANDIVA_AVAILABLE_IN_4_0 G_DECLARE_DERIVABLE_TYPE(GGandivaUInt32SelectionVector, ggandiva_uint32_selection_vector, GGANDIVA, @@ -99,6 +102,7 @@ ggandiva_uint32_selection_vector_new(gint64 max_slots, GError **error); #define GGANDIVA_TYPE_UINT64_SELECTION_VECTOR \ (ggandiva_uint64_selection_vector_get_type()) +GGANDIVA_AVAILABLE_IN_4_0 G_DECLARE_DERIVABLE_TYPE(GGandivaUInt64SelectionVector, ggandiva_uint64_selection_vector, GGANDIVA, diff --git a/c_glib/gandiva-glib/version.h.in b/c_glib/gandiva-glib/version.h.in index 3c9e87c9d52e1..857c7367bd7e2 100644 --- a/c_glib/gandiva-glib/version.h.in +++ b/c_glib/gandiva-glib/version.h.in @@ -38,7 +38,7 @@ * * Since: 1.0.0 */ -#define GGANDIVA_VERSION_MAJOR (@GGANDIVA_VERSION_MAJOR@) +#define GGANDIVA_VERSION_MAJOR (@VERSION_MAJOR@) /** * GGANDIVA_VERSION_MINOR: @@ -47,7 +47,7 @@ * * Since: 1.0.0 */ -#define GGANDIVA_VERSION_MINOR (@GGANDIVA_VERSION_MINOR@) +#define GGANDIVA_VERSION_MINOR (@VERSION_MINOR@) /** * GGANDIVA_VERSION_MICRO: @@ -56,7 +56,7 @@ * * Since: 1.0.0 */ -#define GGANDIVA_VERSION_MICRO (@GGANDIVA_VERSION_MICRO@) +#define GGANDIVA_VERSION_MICRO (@VERSION_MICRO@) /** * GGANDIVA_VERSION_TAG: @@ -66,7 +66,7 @@ * * Since: 1.0.0 */ -#define GGANDIVA_VERSION_TAG "@GGANDIVA_VERSION_TAG@" +#define GGANDIVA_VERSION_TAG "@VERSION_TAG@" /** * GGANDIVA_VERSION_CHECK: @@ -110,23 +110,7 @@ # define GGANDIVA_UNAVAILABLE(major, minor) G_UNAVAILABLE(major, minor) #endif -/** - * GGANDIVA_VERSION_1_0: - * - * You can use this macro value for compile time API version check. - * - * Since: 1.0.0 - */ -#define GGANDIVA_VERSION_1_0 G_ENCODE_VERSION(1, 0) - -/** - * GGANDIVA_VERSION_4_0: - * - * You can use this macro value for compile time API version check. - * - * Since: 4.0.0 - */ -#define GGANDIVA_VERSION_4_0 G_ENCODE_VERSION(4, 0) +@ENCODED_VERSIONS@ /** * GGANDIVA_VERSION_MIN_REQUIRED: @@ -172,47 +156,6 @@ G_ENCODE_VERSION(GGANDIVA_VERSION_MAJOR, GGANDIVA_VERSION_MINOR) #endif +@VISIBILITY_MACROS@ -#define GGANDIVA_AVAILABLE_IN_ALL - -#if GGANDIVA_VERSION_MIN_REQUIRED >= GGANDIVA_VERSION_4_0 -# define GGANDIVA_DEPRECATED_IN_4_0 GGANDIVA_DEPRECATED -# define GGANDIVA_DEPRECATED_IN_4_0_FOR(function) GGANDIVA_DEPRECATED_FOR(function) -#else -# define GGANDIVA_DEPRECATED_IN_4_0 -# define GGANDIVA_DEPRECATED_IN_4_0_FOR(function) -#endif - -#if GGANDIVA_VERSION_MAX_ALLOWED < GGANDIVA_VERSION_4_0 -# define GGANDIVA_AVAILABLE_IN_4_0 GGANDIVA_UNAVAILABLE(4, 0) -#else -# define GGANDIVA_AVAILABLE_IN_4_0 -#endif - -#if GGANDIVA_VERSION_MIN_REQUIRED >= GGANDIVA_VERSION_1_0 -# define GGANDIVA_DEPRECATED_IN_1_0 GGANDIVA_DEPRECATED -# define GGANDIVA_DEPRECATED_IN_1_0_FOR(function) GGANDIVA_DEPRECATED_FOR(function) -#else -# define GGANDIVA_DEPRECATED_IN_1_0 -# define GGANDIVA_DEPRECATED_IN_1_0_FOR(function) -#endif - -#if GGANDIVA_VERSION_MAX_ALLOWED < GGANDIVA_VERSION_1_0 -# define GGANDIVA_AVAILABLE_IN_1_0 GGANDIVA_UNAVAILABLE(1, 0) -#else -# define GGANDIVA_AVAILABLE_IN_1_0 -#endif - -#if GGANDIVA_VERSION_MIN_REQUIRED >= GGANDIVA_VERSION_0_17 -# define GGANDIVA_DEPRECATED_IN_0_17 GGANDIVA_DEPRECATED -# define GGANDIVA_DEPRECATED_IN_0_17_FOR(function) GGANDIVA_DEPRECATED_FOR(function) -#else -# define GGANDIVA_DEPRECATED_IN_0_17 -# define GGANDIVA_DEPRECATED_IN_0_17_FOR(function) -#endif - -#if GGANDIVA_VERSION_MAX_ALLOWED < GGANDIVA_VERSION_0_17 -# define GGANDIVA_AVAILABLE_IN_0_17 GGANDIVA_UNAVAILABLE(0, 17) -#else -# define GGANDIVA_AVAILABLE_IN_0_17 -#endif +@AVAILABILITY_MACROS@ diff --git a/c_glib/meson.build b/c_glib/meson.build index 16a5ea7ccb432..06aa5b941e77c 100644 --- a/c_glib/meson.build +++ b/c_glib/meson.build @@ -26,8 +26,6 @@ project('arrow-glib', 'c', 'cpp', # Debian: # https://packages.debian.org/search?keywords=meson # - # * bullseye: 0.56.2 - # * bullseye-backports:1.0.0 # * bookworm: 1.0.0 # # Ubuntu: @@ -173,6 +171,10 @@ if cxx.get_id() != 'msvc' endif add_project_arguments(cxx.get_supported_arguments(cxx_flags), language: 'cpp') +python = import('python') +python3 = python.find_installation('python3') +generate_version_header_py = project_source_root / 'tool' / 'generate-version-header.py' + subdir('arrow-glib') if arrow_cuda.found() subdir('arrow-cuda-glib') diff --git a/c_glib/parquet-glib/arrow-file-reader.h b/c_glib/parquet-glib/arrow-file-reader.h index 63c14ac71da86..52d7293bad0fa 100644 --- a/c_glib/parquet-glib/arrow-file-reader.h +++ b/c_glib/parquet-glib/arrow-file-reader.h @@ -24,6 +24,7 @@ G_BEGIN_DECLS #define GPARQUET_TYPE_ARROW_FILE_READER (gparquet_arrow_file_reader_get_type()) +GPARQUET_AVAILABLE_IN_0_11 G_DECLARE_DERIVABLE_TYPE(GParquetArrowFileReader, gparquet_arrow_file_reader, GPARQUET, @@ -34,15 +35,19 @@ struct _GParquetArrowFileReaderClass GObjectClass parent_class; }; +GPARQUET_AVAILABLE_IN_0_11 GParquetArrowFileReader * gparquet_arrow_file_reader_new_arrow(GArrowSeekableInputStream *source, GError **error); + +GPARQUET_AVAILABLE_IN_0_11 GParquetArrowFileReader * gparquet_arrow_file_reader_new_path(const gchar *path, GError **error); +GPARQUET_AVAILABLE_IN_0_11 GArrowTable * gparquet_arrow_file_reader_read_table(GParquetArrowFileReader *reader, GError **error); -GARROW_AVAILABLE_IN_1_0 +GPARQUET_AVAILABLE_IN_1_0 GArrowTable * gparquet_arrow_file_reader_read_row_group(GParquetArrowFileReader *reader, gint row_group_index, @@ -50,26 +55,30 @@ gparquet_arrow_file_reader_read_row_group(GParquetArrowFileReader *reader, gsize n_column_indices, GError **error); +GPARQUET_AVAILABLE_IN_0_12 GArrowSchema * gparquet_arrow_file_reader_get_schema(GParquetArrowFileReader *reader, GError **error); +GPARQUET_AVAILABLE_IN_0_15 GArrowChunkedArray * gparquet_arrow_file_reader_read_column_data(GParquetArrowFileReader *reader, gint i, GError **error); +GPARQUET_AVAILABLE_IN_0_11 gint gparquet_arrow_file_reader_get_n_row_groups(GParquetArrowFileReader *reader); -GARROW_AVAILABLE_IN_6_0 +GPARQUET_AVAILABLE_IN_6_0 gint64 gparquet_arrow_file_reader_get_n_rows(GParquetArrowFileReader *reader); +GPARQUET_AVAILABLE_IN_0_11 void gparquet_arrow_file_reader_set_use_threads(GParquetArrowFileReader *reader, gboolean use_threads); -GARROW_AVAILABLE_IN_8_0 +GPARQUET_AVAILABLE_IN_8_0 GParquetFileMetadata * gparquet_arrow_file_reader_get_metadata(GParquetArrowFileReader *reader); diff --git a/c_glib/parquet-glib/arrow-file-writer.h b/c_glib/parquet-glib/arrow-file-writer.h index 592ea4ae3f1ba..71cbfa195e842 100644 --- a/c_glib/parquet-glib/arrow-file-writer.h +++ b/c_glib/parquet-glib/arrow-file-writer.h @@ -20,10 +20,12 @@ #pragma once #include +#include G_BEGIN_DECLS #define GPARQUET_TYPE_WRITER_PROPERTIES (gparquet_writer_properties_get_type()) +GPARQUET_AVAILABLE_IN_0_17 G_DECLARE_DERIVABLE_TYPE(GParquetWriterProperties, gparquet_writer_properties, GPARQUET, @@ -34,61 +36,62 @@ struct _GParquetWriterPropertiesClass GObjectClass parent_class; }; -GARROW_AVAILABLE_IN_0_17 +GPARQUET_AVAILABLE_IN_0_17 GParquetWriterProperties * gparquet_writer_properties_new(void); -GARROW_AVAILABLE_IN_0_17 +GPARQUET_AVAILABLE_IN_0_17 void gparquet_writer_properties_set_compression(GParquetWriterProperties *properties, GArrowCompressionType compression_type, const gchar *path); -GARROW_AVAILABLE_IN_0_17 +GPARQUET_AVAILABLE_IN_0_17 GArrowCompressionType gparquet_writer_properties_get_compression_path(GParquetWriterProperties *properties, const gchar *path); -GARROW_AVAILABLE_IN_0_17 +GPARQUET_AVAILABLE_IN_0_17 void gparquet_writer_properties_enable_dictionary(GParquetWriterProperties *properties, const gchar *path); -GARROW_AVAILABLE_IN_0_17 +GPARQUET_AVAILABLE_IN_0_17 void gparquet_writer_properties_disable_dictionary(GParquetWriterProperties *properties, const gchar *path); -GARROW_AVAILABLE_IN_0_17 +GPARQUET_AVAILABLE_IN_0_17 gboolean gparquet_writer_properties_is_dictionary_enabled(GParquetWriterProperties *properties, const gchar *path); -GARROW_AVAILABLE_IN_0_17 +GPARQUET_AVAILABLE_IN_0_17 void gparquet_writer_properties_set_dictionary_page_size_limit( GParquetWriterProperties *properties, gint64 limit); -GARROW_AVAILABLE_IN_0_17 +GPARQUET_AVAILABLE_IN_0_17 gint64 gparquet_writer_properties_get_dictionary_page_size_limit( GParquetWriterProperties *properties); -GARROW_AVAILABLE_IN_0_17 +GPARQUET_AVAILABLE_IN_0_17 void gparquet_writer_properties_set_batch_size(GParquetWriterProperties *properties, gint64 batch_size); -GARROW_AVAILABLE_IN_0_17 +GPARQUET_AVAILABLE_IN_0_17 gint64 gparquet_writer_properties_get_batch_size(GParquetWriterProperties *properties); -GARROW_AVAILABLE_IN_0_17 +GPARQUET_AVAILABLE_IN_0_17 void gparquet_writer_properties_set_max_row_group_length(GParquetWriterProperties *properties, gint64 length); -GARROW_AVAILABLE_IN_0_17 +GPARQUET_AVAILABLE_IN_0_17 gint64 gparquet_writer_properties_get_max_row_group_length(GParquetWriterProperties *properties); -GARROW_AVAILABLE_IN_0_17 +GPARQUET_AVAILABLE_IN_0_17 void gparquet_writer_properties_set_data_page_size(GParquetWriterProperties *properties, gint64 data_page_size); -GARROW_AVAILABLE_IN_0_17 +GPARQUET_AVAILABLE_IN_0_17 gint64 gparquet_writer_properties_get_data_page_size(GParquetWriterProperties *properties); #define GPARQUET_TYPE_ARROW_FILE_WRITER (gparquet_arrow_file_writer_get_type()) +GPARQUET_AVAILABLE_IN_0_11 G_DECLARE_DERIVABLE_TYPE(GParquetArrowFileWriter, gparquet_arrow_file_writer, GPARQUET, @@ -99,23 +102,28 @@ struct _GParquetArrowFileWriterClass GObjectClass parent_class; }; +GPARQUET_AVAILABLE_IN_0_11 GParquetArrowFileWriter * gparquet_arrow_file_writer_new_arrow(GArrowSchema *schema, GArrowOutputStream *sink, GParquetWriterProperties *writer_properties, GError **error); + +GPARQUET_AVAILABLE_IN_0_11 GParquetArrowFileWriter * gparquet_arrow_file_writer_new_path(GArrowSchema *schema, const gchar *path, GParquetWriterProperties *writer_properties, GError **error); +GPARQUET_AVAILABLE_IN_0_11 gboolean gparquet_arrow_file_writer_write_table(GParquetArrowFileWriter *writer, GArrowTable *table, guint64 chunk_size, GError **error); +GPARQUET_AVAILABLE_IN_0_11 gboolean gparquet_arrow_file_writer_close(GParquetArrowFileWriter *writer, GError **error); diff --git a/c_glib/parquet-glib/meson.build b/c_glib/parquet-glib/meson.build index 67de0bf2d91fb..a3de1d0933f7f 100644 --- a/c_glib/parquet-glib/meson.build +++ b/c_glib/parquet-glib/meson.build @@ -42,10 +42,17 @@ cpp_headers = files( 'parquet-glib.hpp', ) +version_h = configure_file( + input: 'version.h.in', + output: 'version.h', + command: [python3, generate_version_header_py, '--library', 'GPARQUET', '--version', version, '--input', '@INPUT@', '--output', '@OUTPUT@'], +) + +c_headers += version_h + headers = c_headers + cpp_headers install_headers(headers, subdir: project_name) - dependencies = [ arrow, parquet, @@ -57,6 +64,7 @@ libparquet_glib = library('parquet-glib', dependencies: dependencies, implicit_include_directories: false, include_directories: base_include_directories, + cpp_args: ['-DGPARQUET_COMPILATION'], soversion: so_version, version: library_version) parquet_glib = declare_dependency(link_with: libparquet_glib, diff --git a/c_glib/parquet-glib/metadata.h b/c_glib/parquet-glib/metadata.h index 1c9fce7cc778d..d79bf009751ca 100644 --- a/c_glib/parquet-glib/metadata.h +++ b/c_glib/parquet-glib/metadata.h @@ -24,6 +24,7 @@ G_BEGIN_DECLS #define GPARQUET_TYPE_COLUMN_CHUNK_METADATA (gparquet_column_chunk_metadata_get_type()) +GPARQUET_AVAILABLE_IN_8_0 G_DECLARE_DERIVABLE_TYPE(GParquetColumnChunkMetadata, gparquet_column_chunk_metadata, GPARQUET, @@ -34,28 +35,29 @@ struct _GParquetColumnChunkMetadataClass GObjectClass parent_class; }; -GARROW_AVAILABLE_IN_8_0 +GPARQUET_AVAILABLE_IN_8_0 gboolean gparquet_column_chunk_metadata_equal(GParquetColumnChunkMetadata *metadata, GParquetColumnChunkMetadata *other_metadata); -GARROW_AVAILABLE_IN_8_0 +GPARQUET_AVAILABLE_IN_8_0 gint64 gparquet_column_chunk_metadata_get_total_size(GParquetColumnChunkMetadata *metadata); -GARROW_AVAILABLE_IN_8_0 +GPARQUET_AVAILABLE_IN_8_0 gint64 gparquet_column_chunk_metadata_get_total_compressed_size( GParquetColumnChunkMetadata *metadata); -GARROW_AVAILABLE_IN_8_0 +GPARQUET_AVAILABLE_IN_8_0 gint64 gparquet_column_chunk_metadata_get_file_offset(GParquetColumnChunkMetadata *metadata); -GARROW_AVAILABLE_IN_8_0 +GPARQUET_AVAILABLE_IN_8_0 gboolean gparquet_column_chunk_metadata_can_decompress(GParquetColumnChunkMetadata *metadata); -GARROW_AVAILABLE_IN_8_0 +GPARQUET_AVAILABLE_IN_8_0 GParquetStatistics * gparquet_column_chunk_metadata_get_statistics(GParquetColumnChunkMetadata *metadata); #define GPARQUET_TYPE_ROW_GROUP_METADATA (gparquet_row_group_metadata_get_type()) +GPARQUET_AVAILABLE_IN_8_0 G_DECLARE_DERIVABLE_TYPE(GParquetRowGroupMetadata, gparquet_row_group_metadata, GPARQUET, @@ -66,35 +68,36 @@ struct _GParquetRowGroupMetadataClass GObjectClass parent_class; }; -GARROW_AVAILABLE_IN_8_0 +GPARQUET_AVAILABLE_IN_8_0 gboolean gparquet_row_group_metadata_equal(GParquetRowGroupMetadata *metadata, GParquetRowGroupMetadata *other_metadata); -GARROW_AVAILABLE_IN_8_0 +GPARQUET_AVAILABLE_IN_8_0 gint gparquet_row_group_metadata_get_n_columns(GParquetRowGroupMetadata *metadata); -GARROW_AVAILABLE_IN_8_0 +GPARQUET_AVAILABLE_IN_8_0 GParquetColumnChunkMetadata * gparquet_row_group_metadata_get_column_chunk(GParquetRowGroupMetadata *metadata, gint index, GError **error); -GARROW_AVAILABLE_IN_8_0 +GPARQUET_AVAILABLE_IN_8_0 gint64 gparquet_row_group_metadata_get_n_rows(GParquetRowGroupMetadata *metadata); -GARROW_AVAILABLE_IN_8_0 +GPARQUET_AVAILABLE_IN_8_0 gint64 gparquet_row_group_metadata_get_total_size(GParquetRowGroupMetadata *metadata); -GARROW_AVAILABLE_IN_8_0 +GPARQUET_AVAILABLE_IN_8_0 gint64 gparquet_row_group_metadata_get_total_compressed_size(GParquetRowGroupMetadata *metadata); -GARROW_AVAILABLE_IN_8_0 +GPARQUET_AVAILABLE_IN_8_0 gint64 gparquet_row_group_metadata_get_file_offset(GParquetRowGroupMetadata *metadata); -GARROW_AVAILABLE_IN_8_0 +GPARQUET_AVAILABLE_IN_8_0 gboolean gparquet_row_group_metadata_can_decompress(GParquetRowGroupMetadata *metadata); #define GPARQUET_TYPE_FILE_METADATA (gparquet_file_metadata_get_type()) +GPARQUET_AVAILABLE_IN_8_0 G_DECLARE_DERIVABLE_TYPE( GParquetFileMetadata, gparquet_file_metadata, GPARQUET, FILE_METADATA, GObject) struct _GParquetFileMetadataClass @@ -102,34 +105,34 @@ struct _GParquetFileMetadataClass GObjectClass parent_class; }; -GARROW_AVAILABLE_IN_8_0 +GPARQUET_AVAILABLE_IN_8_0 gboolean gparquet_file_metadata_equal(GParquetFileMetadata *metadata, GParquetFileMetadata *other_metadata); -GARROW_AVAILABLE_IN_8_0 +GPARQUET_AVAILABLE_IN_8_0 gint gparquet_file_metadata_get_n_columns(GParquetFileMetadata *metadata); -GARROW_AVAILABLE_IN_8_0 +GPARQUET_AVAILABLE_IN_8_0 gint gparquet_file_metadata_get_n_schema_elements(GParquetFileMetadata *metadata); -GARROW_AVAILABLE_IN_8_0 +GPARQUET_AVAILABLE_IN_8_0 gint64 gparquet_file_metadata_get_n_rows(GParquetFileMetadata *metadata); -GARROW_AVAILABLE_IN_8_0 +GPARQUET_AVAILABLE_IN_8_0 gint gparquet_file_metadata_get_n_row_groups(GParquetFileMetadata *metadata); -GARROW_AVAILABLE_IN_8_0 +GPARQUET_AVAILABLE_IN_8_0 GParquetRowGroupMetadata * gparquet_file_metadata_get_row_group(GParquetFileMetadata *metadata, gint index, GError **error); -GARROW_AVAILABLE_IN_8_0 +GPARQUET_AVAILABLE_IN_8_0 const gchar * gparquet_file_metadata_get_created_by(GParquetFileMetadata *metadata); -GARROW_AVAILABLE_IN_8_0 +GPARQUET_AVAILABLE_IN_8_0 guint32 gparquet_file_metadata_get_size(GParquetFileMetadata *metadata); -GARROW_AVAILABLE_IN_8_0 +GPARQUET_AVAILABLE_IN_8_0 gboolean gparquet_file_metadata_can_decompress(GParquetFileMetadata *metadata); diff --git a/c_glib/parquet-glib/parquet-glib.h b/c_glib/parquet-glib/parquet-glib.h index 23659421ce3d8..308adb87a7ed2 100644 --- a/c_glib/parquet-glib/parquet-glib.h +++ b/c_glib/parquet-glib/parquet-glib.h @@ -19,6 +19,8 @@ #pragma once +#include + #include #include #include diff --git a/c_glib/parquet-glib/statistics.h b/c_glib/parquet-glib/statistics.h index f28e2a3713638..25e02df8774b2 100644 --- a/c_glib/parquet-glib/statistics.h +++ b/c_glib/parquet-glib/statistics.h @@ -21,9 +21,12 @@ #include +#include + G_BEGIN_DECLS #define GPARQUET_TYPE_STATISTICS (gparquet_statistics_get_type()) +GPARQUET_AVAILABLE_IN_8_0 G_DECLARE_DERIVABLE_TYPE( GParquetStatistics, gparquet_statistics, GPARQUET, STATISTICS, GObject) struct _GParquetStatisticsClass @@ -31,30 +34,31 @@ struct _GParquetStatisticsClass GObjectClass parent_class; }; -GARROW_AVAILABLE_IN_8_0 +GPARQUET_AVAILABLE_IN_8_0 gboolean gparquet_statistics_equal(GParquetStatistics *statistics, GParquetStatistics *other_statistics); -GARROW_AVAILABLE_IN_8_0 +GPARQUET_AVAILABLE_IN_8_0 gboolean gparquet_statistics_has_n_nulls(GParquetStatistics *statistics); -GARROW_AVAILABLE_IN_8_0 +GPARQUET_AVAILABLE_IN_8_0 gint64 gparquet_statistics_get_n_nulls(GParquetStatistics *statistics); -GARROW_AVAILABLE_IN_8_0 +GPARQUET_AVAILABLE_IN_8_0 gboolean gparquet_statistics_has_n_distinct_values(GParquetStatistics *statistics); -GARROW_AVAILABLE_IN_8_0 +GPARQUET_AVAILABLE_IN_8_0 gint64 gparquet_statistics_get_n_distinct_values(GParquetStatistics *statistics); -GARROW_AVAILABLE_IN_8_0 +GPARQUET_AVAILABLE_IN_8_0 gint64 gparquet_statistics_get_n_values(GParquetStatistics *statistics); -GARROW_AVAILABLE_IN_8_0 +GPARQUET_AVAILABLE_IN_8_0 gboolean gparquet_statistics_has_min_max(GParquetStatistics *statistics); #define GPARQUET_TYPE_BOOLEAN_STATISTICS (gparquet_boolean_statistics_get_type()) +GPARQUET_AVAILABLE_IN_8_0 G_DECLARE_DERIVABLE_TYPE(GParquetBooleanStatistics, gparquet_boolean_statistics, GPARQUET, @@ -65,14 +69,15 @@ struct _GParquetBooleanStatisticsClass GParquetStatisticsClass parent_class; }; -GARROW_AVAILABLE_IN_8_0 +GPARQUET_AVAILABLE_IN_8_0 gboolean gparquet_boolean_statistics_get_min(GParquetBooleanStatistics *statistics); -GARROW_AVAILABLE_IN_8_0 +GPARQUET_AVAILABLE_IN_8_0 gboolean gparquet_boolean_statistics_get_max(GParquetBooleanStatistics *statistics); #define GPARQUET_TYPE_INT32_STATISTICS (gparquet_int32_statistics_get_type()) +GPARQUET_AVAILABLE_IN_8_0 G_DECLARE_DERIVABLE_TYPE(GParquetInt32Statistics, gparquet_int32_statistics, GPARQUET, @@ -83,14 +88,15 @@ struct _GParquetInt32StatisticsClass GParquetStatisticsClass parent_class; }; -GARROW_AVAILABLE_IN_8_0 +GPARQUET_AVAILABLE_IN_8_0 gint32 gparquet_int32_statistics_get_min(GParquetInt32Statistics *statistics); -GARROW_AVAILABLE_IN_8_0 +GPARQUET_AVAILABLE_IN_8_0 gint32 gparquet_int32_statistics_get_max(GParquetInt32Statistics *statistics); #define GPARQUET_TYPE_INT64_STATISTICS (gparquet_int64_statistics_get_type()) +GPARQUET_AVAILABLE_IN_8_0 G_DECLARE_DERIVABLE_TYPE(GParquetInt64Statistics, gparquet_int64_statistics, GPARQUET, @@ -101,14 +107,15 @@ struct _GParquetInt64StatisticsClass GParquetStatisticsClass parent_class; }; -GARROW_AVAILABLE_IN_8_0 +GPARQUET_AVAILABLE_IN_8_0 gint64 gparquet_int64_statistics_get_min(GParquetInt64Statistics *statistics); -GARROW_AVAILABLE_IN_8_0 +GPARQUET_AVAILABLE_IN_8_0 gint64 gparquet_int64_statistics_get_max(GParquetInt64Statistics *statistics); #define GPARQUET_TYPE_FLOAT_STATISTICS (gparquet_float_statistics_get_type()) +GPARQUET_AVAILABLE_IN_8_0 G_DECLARE_DERIVABLE_TYPE(GParquetFloatStatistics, gparquet_float_statistics, GPARQUET, @@ -119,14 +126,15 @@ struct _GParquetFloatStatisticsClass GParquetStatisticsClass parent_class; }; -GARROW_AVAILABLE_IN_8_0 +GPARQUET_AVAILABLE_IN_8_0 gfloat gparquet_float_statistics_get_min(GParquetFloatStatistics *statistics); -GARROW_AVAILABLE_IN_8_0 +GPARQUET_AVAILABLE_IN_8_0 gfloat gparquet_float_statistics_get_max(GParquetFloatStatistics *statistics); #define GPARQUET_TYPE_DOUBLE_STATISTICS (gparquet_double_statistics_get_type()) +GPARQUET_AVAILABLE_IN_8_0 G_DECLARE_DERIVABLE_TYPE(GParquetDoubleStatistics, gparquet_double_statistics, GPARQUET, @@ -137,14 +145,15 @@ struct _GParquetDoubleStatisticsClass GParquetStatisticsClass parent_class; }; -GARROW_AVAILABLE_IN_8_0 +GPARQUET_AVAILABLE_IN_8_0 gdouble gparquet_double_statistics_get_min(GParquetDoubleStatistics *statistics); -GARROW_AVAILABLE_IN_8_0 +GPARQUET_AVAILABLE_IN_8_0 gdouble gparquet_double_statistics_get_max(GParquetDoubleStatistics *statistics); #define GPARQUET_TYPE_BYTE_ARRAY_STATISTICS (gparquet_byte_array_statistics_get_type()) +GPARQUET_AVAILABLE_IN_8_0 G_DECLARE_DERIVABLE_TYPE(GParquetByteArrayStatistics, gparquet_byte_array_statistics, GPARQUET, @@ -155,15 +164,16 @@ struct _GParquetByteArrayStatisticsClass GParquetStatisticsClass parent_class; }; -GARROW_AVAILABLE_IN_8_0 +GPARQUET_AVAILABLE_IN_8_0 GBytes * gparquet_byte_array_statistics_get_min(GParquetByteArrayStatistics *statistics); -GARROW_AVAILABLE_IN_8_0 +GPARQUET_AVAILABLE_IN_8_0 GBytes * gparquet_byte_array_statistics_get_max(GParquetByteArrayStatistics *statistics); #define GPARQUET_TYPE_FIXED_LENGTH_BYTE_ARRAY_STATISTICS \ (gparquet_fixed_length_byte_array_statistics_get_type()) +GPARQUET_AVAILABLE_IN_8_0 G_DECLARE_DERIVABLE_TYPE(GParquetFixedLengthByteArrayStatistics, gparquet_fixed_length_byte_array_statistics, GPARQUET, @@ -174,11 +184,11 @@ struct _GParquetFixedLengthByteArrayStatisticsClass GParquetStatisticsClass parent_class; }; -GARROW_AVAILABLE_IN_8_0 +GPARQUET_AVAILABLE_IN_8_0 GBytes * gparquet_fixed_length_byte_array_statistics_get_min( GParquetFixedLengthByteArrayStatistics *statistics); -GARROW_AVAILABLE_IN_8_0 +GPARQUET_AVAILABLE_IN_8_0 GBytes * gparquet_fixed_length_byte_array_statistics_get_max( GParquetFixedLengthByteArrayStatistics *statistics); diff --git a/c_glib/parquet-glib/version.h.in b/c_glib/parquet-glib/version.h.in new file mode 100644 index 0000000000000..142b3b83e0f3d --- /dev/null +++ b/c_glib/parquet-glib/version.h.in @@ -0,0 +1,159 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include + +/** + * SECTION: version + * @section_id: version-macros + * @title: Version related macros + * @include: parquet-glib/parquet-glib.h + * + * Parquet GLib provides macros that can be used by C pre-processor. + * They are useful to check version related things at compile time. + */ + +/** + * GPARQUET_VERSION_MAJOR: + * + * The major version. + * + * Since: 17.0.0 + */ +#define GPARQUET_VERSION_MAJOR (@VERSION_MAJOR@) + +/** + * GPARQUET_VERSION_MINOR: + * + * The minor version. + * + * Since: 17.0.0 + */ +#define GPARQUET_VERSION_MINOR (@VERSION_MINOR@) + +/** + * GPARQUET_VERSION_MICRO: + * + * The micro version. + * + * Since: 17.0.0 + */ +#define GPARQUET_VERSION_MICRO (@VERSION_MICRO@) + +/** + * GPARQUET_VERSION_TAG: + * + * The version tag. Normally, it's an empty string. It's "SNAPSHOT" + * for snapshot version. + * + * Since: 17.0.0 + */ +#define GPARQUET_VERSION_TAG "@VERSION_TAG@" + +/** + * GPARQUET_VERSION_CHECK: + * @major: A major version to check for. + * @minor: A minor version to check for. + * @micro: A micro version to check for. + * + * You can use this macro in C pre-processor. + * + * Returns: %TRUE if the compile time Apache Arrow GLib version is the + * same as or newer than the passed version, %FALSE otherwise. + * + * Since: 17.0.0 + */ +#define GPARQUET_VERSION_CHECK(major, minor, micro) \ + (GPARQUET_VERSION_MAJOR > (major) || \ + (GPARQUET_VERSION_MAJOR == (major) && \ + GPARQUET_VERSION_MINOR > (minor)) || \ + (GPARQUET_VERSION_MAJOR == (major) && \ + GPARQUET_VERSION_MINOR == (minor) && \ + GPARQUET_VERSION_MICRO >= (micro))) + +/** + * GPARQUET_DISABLE_DEPRECATION_WARNINGS: + * + * If this macro is defined, no deprecated warnings are produced. + * + * You must define this macro before including the + * arrow-glib/arrow-glib.h header. + * + * Since: 17.0.0 + */ + +#ifdef GPARQUET_DISABLE_DEPRECATION_WARNINGS +# define GPARQUET_DEPRECATED +# define GPARQUET_DEPRECATED_FOR(function) +# define GPARQUET_UNAVAILABLE(major, minor) +#else +# define GPARQUET_DEPRECATED G_DEPRECATED +# define GPARQUET_DEPRECATED_FOR(function) G_DEPRECATED_FOR(function) +# define GPARQUET_UNAVAILABLE(major, minor) G_UNAVAILABLE(major, minor) +#endif + +@ENCODED_VERSIONS@ + +/** + * GPARQUET_VERSION_MIN_REQUIRED: + * + * You can use this macro for compile time API version check. + * + * This macro value must be one of the predefined version macros such + * as %GPARQUET_VERSION_0_10. + * + * If you use any functions that is defined by newer version than + * %GPARQUET_VERSION_MIN_REQUIRED, deprecated warnings are produced at + * compile time. + * + * You must define this macro before including the + * parquet-glib/parquet-glib.h header. + * + * Since: 17.0.0 + */ +#ifndef GPARQUET_VERSION_MIN_REQUIRED +# define GPARQUET_VERSION_MIN_REQUIRED GARROW_VERSION_MIN_REQUIRED +#endif + +/** + * GPARQUET_VERSION_MAX_ALLOWED: + * + * You can use this macro for compile time API version check. + * + * This macro value must be one of the predefined version macros such + * as %GPARQUET_VERSION_0_10. + * + * If you use any functions that is defined by newer version than + * %GPARQUET_VERSION_MAX_ALLOWED, deprecated warnings are produced at + * compile time. + * + * You must define this macro before including the + * parquet-glib/parquet-glib.h header. + * + * Since: 17.0.0 + */ +#ifndef GPARQUET_VERSION_MAX_ALLOWED +# define GPARQUET_VERSION_MAX_ALLOWED GARROW_VERSION_MAX_ALLOWED +#endif + +@VISIBILITY_MACROS@ + +@AVAILABILITY_MACROS@ diff --git a/c_glib/test/dataset/test-file-system-dataset.rb b/c_glib/test/dataset/test-file-system-dataset.rb index 0e856b678f860..96deedf6b4eb0 100644 --- a/c_glib/test/dataset/test-file-system-dataset.rb +++ b/c_glib/test/dataset/test-file-system-dataset.rb @@ -56,6 +56,22 @@ def test_partitioning end def test_read_write + dataset, expected_table = create_dataset + assert_equal(expected_table, dataset.to_table) + end + + def test_to_record_batch_reader + dataset, expected_table = create_dataset + reader = dataset.to_record_batch_reader + begin + assert_equal(expected_table, reader.read_all) + ensure + # Unref to ensure the reader closes files and we can delete the temp directory + reader.unref + end + end + + def create_dataset table = build_table(label: build_string_array(["a", "a", "b", "c"]), count: build_int32_array([1, 10, 2, 3])) table_reader = Arrow::TableBatchReader.new(table) @@ -73,7 +89,8 @@ def test_read_write end @factory.partition_base_dir = @dir dataset = @factory.finish - assert_equal(build_table(count: [ + + expected_table = build_table(count: [ build_int32_array([1, 10]), build_int32_array([2]), build_int32_array([3]), @@ -82,7 +99,8 @@ def test_read_write build_string_array(["a", "a"]), build_string_array(["b"]), build_string_array(["c"]), - ]), - dataset.to_table) + ]) + + return dataset, expected_table end end diff --git a/c_glib/test/dataset/test-scanner.rb b/c_glib/test/dataset/test-scanner.rb index f7702d4905fb6..5dc31eefc5f4c 100644 --- a/c_glib/test/dataset/test-scanner.rb +++ b/c_glib/test/dataset/test-scanner.rb @@ -45,4 +45,14 @@ def setup def test_to_table assert_equal(@table, @scanner.to_table) end + + def test_to_record_batch_reader + reader = @scanner.to_record_batch_reader + begin + assert_equal(@table, reader.read_all) + ensure + # Unref to ensure the reader closes files and we can delete the temp directory + reader.unref + end + end end diff --git a/c_glib/test/parquet/test-arrow-file-reader.rb b/c_glib/test/parquet/test-arrow-file-reader.rb index 45eb335965434..eff5ad966aea6 100644 --- a/c_glib/test/parquet/test-arrow-file-reader.rb +++ b/c_glib/test/parquet/test-arrow-file-reader.rb @@ -20,16 +20,23 @@ class TestParquetArrowFileReader < Test::Unit::TestCase def setup omit("Parquet is required") unless defined?(::Parquet) - @file = Tempfile.open(["data", ".parquet"]) - @a_array = build_string_array(["foo", "bar"]) - @b_array = build_int32_array([123, 456]) - @table = build_table("a" => @a_array, - "b" => @b_array) - writer = Parquet::ArrowFileWriter.new(@table.schema, @file.path) - chunk_size = 1 - writer.write_table(@table, chunk_size) - writer.close - @reader = Parquet::ArrowFileReader.new(@file.path) + Tempfile.create(["data", ".parquet"]) do |file| + @file = file + @a_array = build_string_array(["foo", "bar"]) + @b_array = build_int32_array([123, 456]) + @table = build_table("a" => @a_array, + "b" => @b_array) + writer = Parquet::ArrowFileWriter.new(@table.schema, @file.path) + chunk_size = 1 + writer.write_table(@table, chunk_size) + writer.close + @reader = Parquet::ArrowFileReader.new(@file.path) + begin + yield + ensure + @reader.unref + end + end end def test_schema diff --git a/c_glib/test/parquet/test-arrow-file-writer.rb b/c_glib/test/parquet/test-arrow-file-writer.rb index 855527444d063..f899e7273b2a2 100644 --- a/c_glib/test/parquet/test-arrow-file-writer.rb +++ b/c_glib/test/parquet/test-arrow-file-writer.rb @@ -20,7 +20,10 @@ class TestParquetArrowFileWriter < Test::Unit::TestCase def setup omit("Parquet is required") unless defined?(::Parquet) - @file = Tempfile.open(["data", ".parquet"]) + Tempfile.create(["data", ".parquet"]) do |file| + @file = file + yield + end end def test_write @@ -33,14 +36,18 @@ def test_write writer.close reader = Parquet::ArrowFileReader.new(@file.path) - reader.use_threads = true - assert_equal([ - enabled_values.length / chunk_size, - true, - ], - [ - reader.n_row_groups, - table.equal_metadata(reader.read_table, false), - ]) + begin + reader.use_threads = true + assert_equal([ + enabled_values.length / chunk_size, + true, + ], + [ + reader.n_row_groups, + table.equal_metadata(reader.read_table, false), + ]) + ensure + reader.unref + end end end diff --git a/c_glib/test/parquet/test-boolean-statistics.rb b/c_glib/test/parquet/test-boolean-statistics.rb index 6131a22195cb8..244348641320e 100644 --- a/c_glib/test/parquet/test-boolean-statistics.rb +++ b/c_glib/test/parquet/test-boolean-statistics.rb @@ -20,14 +20,22 @@ class TestParquetBooleanStatistics < Test::Unit::TestCase def setup omit("Parquet is required") unless defined?(::Parquet) - @file = Tempfile.open(["data", ".parquet"]) - @table = build_table("boolean" => build_boolean_array([nil, false, true])) - writer = Parquet::ArrowFileWriter.new(@table.schema, @file.path) - chunk_size = 1024 - writer.write_table(@table, chunk_size) - writer.close - reader = Parquet::ArrowFileReader.new(@file.path) - @statistics = reader.metadata.get_row_group(0).get_column_chunk(0).statistics + Tempfile.create(["data", ".parquet"]) do |file| + @file = file + @table = build_table("boolean" => build_boolean_array([nil, false, true])) + writer = Parquet::ArrowFileWriter.new(@table.schema, @file.path) + chunk_size = 1024 + writer.write_table(@table, chunk_size) + writer.close + reader = Parquet::ArrowFileReader.new(@file.path) + begin + @statistics = + reader.metadata.get_row_group(0).get_column_chunk(0).statistics + yield + ensure + reader.unref + end + end end test("#min") do diff --git a/c_glib/test/parquet/test-byte-array-statistics.rb b/c_glib/test/parquet/test-byte-array-statistics.rb index 50ec409dbce7c..b9693a77fff13 100644 --- a/c_glib/test/parquet/test-byte-array-statistics.rb +++ b/c_glib/test/parquet/test-byte-array-statistics.rb @@ -20,14 +20,22 @@ class TestParquetByteArrayStatistics < Test::Unit::TestCase def setup omit("Parquet is required") unless defined?(::Parquet) - @file = Tempfile.open(["data", ".parquet"]) - @table = build_table("string" => build_string_array([nil, "abc", "xyz"])) - writer = Parquet::ArrowFileWriter.new(@table.schema, @file.path) - chunk_size = 1024 - writer.write_table(@table, chunk_size) - writer.close - reader = Parquet::ArrowFileReader.new(@file.path) - @statistics = reader.metadata.get_row_group(0).get_column_chunk(0).statistics + Tempfile.create(["data", ".parquet"]) do |file| + @file = file + @table = build_table("string" => build_string_array([nil, "abc", "xyz"])) + writer = Parquet::ArrowFileWriter.new(@table.schema, @file.path) + chunk_size = 1024 + writer.write_table(@table, chunk_size) + writer.close + reader = Parquet::ArrowFileReader.new(@file.path) + begin + @statistics = + reader.metadata.get_row_group(0).get_column_chunk(0).statistics + yield + ensure + reader.unref + end + end end test("#min") do diff --git a/c_glib/test/parquet/test-column-chunk-metadata.rb b/c_glib/test/parquet/test-column-chunk-metadata.rb index a93fe85bbfbf1..f0012f0124577 100644 --- a/c_glib/test/parquet/test-column-chunk-metadata.rb +++ b/c_glib/test/parquet/test-column-chunk-metadata.rb @@ -20,35 +20,46 @@ class TestParquetColumnChunkMetadata < Test::Unit::TestCase def setup omit("Parquet is required") unless defined?(::Parquet) - @file = Tempfile.open(["data", ".parquet"]) - @string_array = build_string_array([nil, "hello"]) - fields = [ - Arrow::Field.new("int8", Arrow::Int8DataType.new), - Arrow::Field.new("boolean", Arrow::BooleanDataType.new), - ] - structs = [ - { - "int8" => -29, - "boolean" => true, - }, - nil, - ] - @struct_array = build_struct_array(fields, structs) - @table = build_table("string" => @string_array, - "struct" => @struct_array) - writer = Parquet::ArrowFileWriter.new(@table.schema, @file.path) - chunk_size = 1 - writer.write_table(@table, chunk_size) - writer.close - reader = Parquet::ArrowFileReader.new(@file.path) - @metadata = reader.metadata.get_row_group(0).get_column_chunk(0) + Tempfile.create(["data", ".parquet"]) do |file| + @file = file + @string_array = build_string_array([nil, "hello"]) + fields = [ + Arrow::Field.new("int8", Arrow::Int8DataType.new), + Arrow::Field.new("boolean", Arrow::BooleanDataType.new), + ] + structs = [ + { + "int8" => -29, + "boolean" => true, + }, + nil, + ] + @struct_array = build_struct_array(fields, structs) + @table = build_table("string" => @string_array, + "struct" => @struct_array) + writer = Parquet::ArrowFileWriter.new(@table.schema, @file.path) + chunk_size = 1 + writer.write_table(@table, chunk_size) + writer.close + reader = Parquet::ArrowFileReader.new(@file.path) + begin + @metadata = reader.metadata.get_row_group(0).get_column_chunk(0) + yield + ensure + reader.unref + end + end end test("#==") do reader = Parquet::ArrowFileReader.new(@file.path) - other_metadata = reader.metadata.get_row_group(0).get_column_chunk(0) - assert do - @metadata == other_metadata + begin + other_metadata = reader.metadata.get_row_group(0).get_column_chunk(0) + assert do + @metadata == other_metadata + end + ensure + reader.unref end end diff --git a/c_glib/test/parquet/test-double-statistics.rb b/c_glib/test/parquet/test-double-statistics.rb index a610fb24a9bdf..6c7a95824570d 100644 --- a/c_glib/test/parquet/test-double-statistics.rb +++ b/c_glib/test/parquet/test-double-statistics.rb @@ -20,14 +20,22 @@ class TestParquetDoubleStatistics < Test::Unit::TestCase def setup omit("Parquet is required") unless defined?(::Parquet) - @file = Tempfile.open(["data", ".parquet"]) - @table = build_table("double" => build_double_array([nil, -2.9, 2.9])) - writer = Parquet::ArrowFileWriter.new(@table.schema, @file.path) - chunk_size = 1024 - writer.write_table(@table, chunk_size) - writer.close - reader = Parquet::ArrowFileReader.new(@file.path) - @statistics = reader.metadata.get_row_group(0).get_column_chunk(0).statistics + Tempfile.create(["data", ".parquet"]) do |file| + @file = file + @table = build_table("double" => build_double_array([nil, -2.9, 2.9])) + writer = Parquet::ArrowFileWriter.new(@table.schema, @file.path) + chunk_size = 1024 + writer.write_table(@table, chunk_size) + writer.close + reader = Parquet::ArrowFileReader.new(@file.path) + begin + @statistics = + reader.metadata.get_row_group(0).get_column_chunk(0).statistics + yield + ensure + reader.unref + end + end end test("#min") do diff --git a/c_glib/test/parquet/test-file-metadata.rb b/c_glib/test/parquet/test-file-metadata.rb index 2bca7e66e0b07..aec3f4ab829b9 100644 --- a/c_glib/test/parquet/test-file-metadata.rb +++ b/c_glib/test/parquet/test-file-metadata.rb @@ -20,35 +20,46 @@ class TestParquetFileMetadata < Test::Unit::TestCase def setup omit("Parquet is required") unless defined?(::Parquet) - @file = Tempfile.open(["data", ".parquet"]) - @string_array = build_string_array([nil, "hello"]) - fields = [ - Arrow::Field.new("int8", Arrow::Int8DataType.new), - Arrow::Field.new("boolean", Arrow::BooleanDataType.new), - ] - structs = [ - { - "int8" => -29, - "boolean" => true, - }, - nil, - ] - @struct_array = build_struct_array(fields, structs) - @table = build_table("string" => @string_array, - "struct" => @struct_array) - writer = Parquet::ArrowFileWriter.new(@table.schema, @file.path) - chunk_size = 1 - writer.write_table(@table, chunk_size) - writer.close - reader = Parquet::ArrowFileReader.new(@file.path) - @metadata = reader.metadata + Tempfile.create(["data", ".parquet"]) do |file| + @file = file + @string_array = build_string_array([nil, "hello"]) + fields = [ + Arrow::Field.new("int8", Arrow::Int8DataType.new), + Arrow::Field.new("boolean", Arrow::BooleanDataType.new), + ] + structs = [ + { + "int8" => -29, + "boolean" => true, + }, + nil, + ] + @struct_array = build_struct_array(fields, structs) + @table = build_table("string" => @string_array, + "struct" => @struct_array) + writer = Parquet::ArrowFileWriter.new(@table.schema, @file.path) + chunk_size = 1 + writer.write_table(@table, chunk_size) + writer.close + reader = Parquet::ArrowFileReader.new(@file.path) + begin + @metadata = reader.metadata + yield + ensure + reader.unref + end + end end test("#==") do reader = Parquet::ArrowFileReader.new(@file.path) - other_metadata = reader.metadata - assert do - @metadata == other_metadata + begin + other_metadata = reader.metadata + assert do + @metadata == other_metadata + end + ensure + reader.unref end end diff --git a/c_glib/test/parquet/test-fixed-length-byte-array-statistics.rb b/c_glib/test/parquet/test-fixed-length-byte-array-statistics.rb index 87a96d009c509..c2f179627d06a 100644 --- a/c_glib/test/parquet/test-fixed-length-byte-array-statistics.rb +++ b/c_glib/test/parquet/test-fixed-length-byte-array-statistics.rb @@ -20,16 +20,24 @@ class TestParquetFixedLengthByteArrayStatistics < Test::Unit::TestCase def setup omit("Parquet is required") unless defined?(::Parquet) - @file = Tempfile.open(["data", ".parquet"]) - data_type = Arrow::FixedSizeBinaryDataType.new(3) - array = build_fixed_size_binary_array(data_type, [nil, "abc", "xyz"]) - @table = build_table("binary" => array) - writer = Parquet::ArrowFileWriter.new(@table.schema, @file.path) - chunk_size = 1024 - writer.write_table(@table, chunk_size) - writer.close - reader = Parquet::ArrowFileReader.new(@file.path) - @statistics = reader.metadata.get_row_group(0).get_column_chunk(0).statistics + Tempfile.create(["data", ".parquet"]) do |file| + @file = file + data_type = Arrow::FixedSizeBinaryDataType.new(3) + array = build_fixed_size_binary_array(data_type, [nil, "abc", "xyz"]) + @table = build_table("binary" => array) + writer = Parquet::ArrowFileWriter.new(@table.schema, @file.path) + chunk_size = 1024 + writer.write_table(@table, chunk_size) + writer.close + reader = Parquet::ArrowFileReader.new(@file.path) + begin + @statistics = + reader.metadata.get_row_group(0).get_column_chunk(0).statistics + yield + ensure + reader.unref + end + end end test("#min") do diff --git a/c_glib/test/parquet/test-float-statistics.rb b/c_glib/test/parquet/test-float-statistics.rb index 2622a2bb36fe6..7d1a233f53ca0 100644 --- a/c_glib/test/parquet/test-float-statistics.rb +++ b/c_glib/test/parquet/test-float-statistics.rb @@ -20,14 +20,22 @@ class TestParquetFloatStatistics < Test::Unit::TestCase def setup omit("Parquet is required") unless defined?(::Parquet) - @file = Tempfile.open(["data", ".parquet"]) - @table = build_table("float" => build_float_array([nil, -2.9, 2.9])) - writer = Parquet::ArrowFileWriter.new(@table.schema, @file.path) - chunk_size = 1024 - writer.write_table(@table, chunk_size) - writer.close - reader = Parquet::ArrowFileReader.new(@file.path) - @statistics = reader.metadata.get_row_group(0).get_column_chunk(0).statistics + Tempfile.create(["data", ".parquet"]) do |file| + @file = file + @table = build_table("float" => build_float_array([nil, -2.9, 2.9])) + writer = Parquet::ArrowFileWriter.new(@table.schema, @file.path) + chunk_size = 1024 + writer.write_table(@table, chunk_size) + writer.close + reader = Parquet::ArrowFileReader.new(@file.path) + begin + @statistics = + reader.metadata.get_row_group(0).get_column_chunk(0).statistics + yield + ensure + reader.unref + end + end end test("#min") do diff --git a/c_glib/test/parquet/test-int32-statistics.rb b/c_glib/test/parquet/test-int32-statistics.rb index 041f07c74292f..8d41327f88014 100644 --- a/c_glib/test/parquet/test-int32-statistics.rb +++ b/c_glib/test/parquet/test-int32-statistics.rb @@ -20,14 +20,22 @@ class TestParquetInt32Statistics < Test::Unit::TestCase def setup omit("Parquet is required") unless defined?(::Parquet) - @file = Tempfile.open(["data", ".parquet"]) - @table = build_table("int32" => build_int32_array([nil, -2, 9])) - writer = Parquet::ArrowFileWriter.new(@table.schema, @file.path) - chunk_size = 1024 - writer.write_table(@table, chunk_size) - writer.close - reader = Parquet::ArrowFileReader.new(@file.path) - @statistics = reader.metadata.get_row_group(0).get_column_chunk(0).statistics + Tempfile.create(["data", ".parquet"]) do |file| + @file = file + @table = build_table("int32" => build_int32_array([nil, -2, 9])) + writer = Parquet::ArrowFileWriter.new(@table.schema, @file.path) + chunk_size = 1024 + writer.write_table(@table, chunk_size) + writer.close + reader = Parquet::ArrowFileReader.new(@file.path) + begin + @statistics = + reader.metadata.get_row_group(0).get_column_chunk(0).statistics + yield + ensure + reader.unref + end + end end test("#min") do diff --git a/c_glib/test/parquet/test-int64-statistics.rb b/c_glib/test/parquet/test-int64-statistics.rb index 0a014573c1144..81fce8a0bbbbd 100644 --- a/c_glib/test/parquet/test-int64-statistics.rb +++ b/c_glib/test/parquet/test-int64-statistics.rb @@ -20,15 +20,23 @@ class TestParquetInt64Statistics < Test::Unit::TestCase def setup omit("Parquet is required") unless defined?(::Parquet) - @file = Tempfile.open(["data", ".parquet"]) - array = build_int64_array([nil, -(2 ** 32), 2 ** 32]) - @table = build_table("int64" => array) - writer = Parquet::ArrowFileWriter.new(@table.schema, @file.path) - chunk_size = 1024 - writer.write_table(@table, chunk_size) - writer.close - reader = Parquet::ArrowFileReader.new(@file.path) - @statistics = reader.metadata.get_row_group(0).get_column_chunk(0).statistics + Tempfile.create(["data", ".parquet"]) do |file| + @file = file + array = build_int64_array([nil, -(2 ** 32), 2 ** 32]) + @table = build_table("int64" => array) + writer = Parquet::ArrowFileWriter.new(@table.schema, @file.path) + chunk_size = 1024 + writer.write_table(@table, chunk_size) + writer.close + reader = Parquet::ArrowFileReader.new(@file.path) + begin + @statistics = + reader.metadata.get_row_group(0).get_column_chunk(0).statistics + yield + ensure + reader.unref + end + end end test("#min") do diff --git a/c_glib/test/parquet/test-row-group-metadata.rb b/c_glib/test/parquet/test-row-group-metadata.rb index e68cb9d11ee62..f238dd3b5774e 100644 --- a/c_glib/test/parquet/test-row-group-metadata.rb +++ b/c_glib/test/parquet/test-row-group-metadata.rb @@ -20,35 +20,46 @@ class TestParquetRowGroupMetadata < Test::Unit::TestCase def setup omit("Parquet is required") unless defined?(::Parquet) - @file = Tempfile.open(["data", ".parquet"]) - @string_array = build_string_array([nil, "hello"]) - fields = [ - Arrow::Field.new("int8", Arrow::Int8DataType.new), - Arrow::Field.new("boolean", Arrow::BooleanDataType.new), - ] - structs = [ - { - "int8" => -29, - "boolean" => true, - }, - nil, - ] - @struct_array = build_struct_array(fields, structs) - @table = build_table("string" => @string_array, - "struct" => @struct_array) - writer = Parquet::ArrowFileWriter.new(@table.schema, @file.path) - chunk_size = 1 - writer.write_table(@table, chunk_size) - writer.close - reader = Parquet::ArrowFileReader.new(@file.path) - @metadata = reader.metadata.get_row_group(0) + Tempfile.create(["data", ".parquet"]) do |file| + @file = file + @string_array = build_string_array([nil, "hello"]) + fields = [ + Arrow::Field.new("int8", Arrow::Int8DataType.new), + Arrow::Field.new("boolean", Arrow::BooleanDataType.new), + ] + structs = [ + { + "int8" => -29, + "boolean" => true, + }, + nil, + ] + @struct_array = build_struct_array(fields, structs) + @table = build_table("string" => @string_array, + "struct" => @struct_array) + writer = Parquet::ArrowFileWriter.new(@table.schema, @file.path) + chunk_size = 1 + writer.write_table(@table, chunk_size) + writer.close + reader = Parquet::ArrowFileReader.new(@file.path) + begin + @metadata = reader.metadata.get_row_group(0) + yield + ensure + reader.unref + end + end end test("#==") do reader = Parquet::ArrowFileReader.new(@file.path) - other_metadata = reader.metadata.get_row_group(0) - assert do - @metadata == other_metadata + begin + other_metadata = reader.metadata.get_row_group(0) + assert do + @metadata == other_metadata + end + ensure + reader.unref end end diff --git a/c_glib/test/parquet/test-statistics.rb b/c_glib/test/parquet/test-statistics.rb index 0367084c88a49..09a47ac255927 100644 --- a/c_glib/test/parquet/test-statistics.rb +++ b/c_glib/test/parquet/test-statistics.rb @@ -20,22 +20,34 @@ class TestParquetStatistics < Test::Unit::TestCase def setup omit("Parquet is required") unless defined?(::Parquet) - @file = Tempfile.open(["data", ".parquet"]) - @table = build_table("int32" => build_int32_array([nil, 2, 2, 9])) - writer = Parquet::ArrowFileWriter.new(@table.schema, @file.path) - chunk_size = 1024 - writer.write_table(@table, chunk_size) - writer.close - reader = Parquet::ArrowFileReader.new(@file.path) - @statistics = reader.metadata.get_row_group(0).get_column_chunk(0).statistics + Tempfile.create(["data", ".parquet"]) do |file| + @file = file + @table = build_table("int32" => build_int32_array([nil, 2, 2, 9])) + writer = Parquet::ArrowFileWriter.new(@table.schema, @file.path) + chunk_size = 1024 + writer.write_table(@table, chunk_size) + writer.close + reader = Parquet::ArrowFileReader.new(@file.path) + begin + @statistics = + reader.metadata.get_row_group(0).get_column_chunk(0).statistics + yield + ensure + reader.unref + end + end end test("#==") do reader = Parquet::ArrowFileReader.new(@file.path) - other_statistics = - reader.metadata.get_row_group(0).get_column_chunk(0).statistics - assert do - @statistics == other_statistics + begin + other_statistics = + reader.metadata.get_row_group(0).get_column_chunk(0).statistics + assert do + @statistics == other_statistics + end + ensure + reader.unref end end diff --git a/c_glib/tool/generate-version-header.py b/c_glib/tool/generate-version-header.py new file mode 100755 index 0000000000000..f2fc26132c143 --- /dev/null +++ b/c_glib/tool/generate-version-header.py @@ -0,0 +1,172 @@ +#!/usr/bin/env python3 + +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + + +import argparse +from io import TextIOBase +from pathlib import Path +import re + + +def main(): + parser = argparse.ArgumentParser( + description="Generate C header with version macros") + parser.add_argument( + "--library", + required=True, + help="The library name to use in macro prefixes") + parser.add_argument( + "--version", + required=True, + help="The library version number") + parser.add_argument( + "--input", + type=Path, + required=True, + help="Path to the input template file") + parser.add_argument( + "--output", + type=Path, + required=True, + help="Path to the output file to generate") + + args = parser.parse_args() + + with open(args.input, "r", encoding="utf-8") as input_file, \ + open(args.output, "w", encoding="utf-8") as output_file: + write_header( + input_file, output_file, args.library, args.version) + + +def write_header( + input_file: TextIOBase, + output_file: TextIOBase, + library_name: str, + version: str): + if "-" in version: + version, version_tag = version.split("-") + else: + version_tag = "" + version_major, version_minor, version_micro = [int(v) for v in version.split(".")] + + encoded_versions = generate_encoded_versions(library_name) + visibility_macros = generate_visibility_macros(library_name) + availability_macros = generate_availability_macros(library_name) + + replacements = { + "VERSION_MAJOR": str(version_major), + "VERSION_MINOR": str(version_minor), + "VERSION_MICRO": str(version_micro), + "VERSION_TAG": version_tag, + "ENCODED_VERSIONS": encoded_versions, + "VISIBILITY_MACROS": visibility_macros, + "AVAILABILITY_MACROS": availability_macros, + } + + output_file.write(re.sub( + r"@([A-Z_]+)@", lambda match: replacements[match[1]], input_file.read())) + + +def generate_visibility_macros(library: str) -> str: + return f"""#if (defined(_WIN32) || defined(__CYGWIN__)) && defined(_MSVC_LANG) && \ + !defined({library}_STATIC_COMPILATION) +# define {library}_EXPORT __declspec(dllexport) +# define {library}_IMPORT __declspec(dllimport) +#else +# define {library}_EXPORT +# define {library}_IMPORT +#endif + +#ifdef {library}_COMPILATION +# define {library}_API {library}_EXPORT +#else +# define {library}_API {library}_IMPORT +#endif + +#define {library}_EXTERN {library}_API extern""" + + +def generate_encoded_versions(library: str) -> str: + macros = [] + + for major_version, minor_version in ALL_VERSIONS: + macros.append(f"""/** + * {library}_VERSION_{major_version}_{minor_version}: + * + * You can use this macro value for compile time API version check. + * + * Since: {major_version}.{minor_version}.0 + */ +#define {library}_VERSION_{major_version}_{minor_version} G_ENCODE_VERSION({major_version}, {minor_version})""") # noqa: E501 + + return "\n\n".join(macros) + + +def generate_availability_macros(library: str) -> str: + macros = [f"""#define {library}_AVAILABLE_IN_ALL {library}_EXTERN"""] + + for major_version, minor_version in ALL_VERSIONS: + macros.append(f"""#if {library}_VERSION_MIN_REQUIRED >= {library}_VERSION_{major_version}_{minor_version} +# define {library}_DEPRECATED_IN_{major_version}_{minor_version} {library}_DEPRECATED +# define {library}_DEPRECATED_IN_{major_version}_{minor_version}_FOR(function) {library}_DEPRECATED_FOR(function) +#else +# define {library}_DEPRECATED_IN_{major_version}_{minor_version} +# define {library}_DEPRECATED_IN_{major_version}_{minor_version}_FOR(function) +#endif + +#if {library}_VERSION_MAX_ALLOWED < {library}_VERSION_{major_version}_{minor_version} +# define {library}_AVAILABLE_IN_{major_version}_{minor_version} {library}_EXTERN {library}_UNAVAILABLE({major_version}, {minor_version}) +#else +# define {library}_AVAILABLE_IN_{major_version}_{minor_version} {library}_EXTERN +#endif""") # noqa: E501 + + return "\n\n".join(macros) + + +ALL_VERSIONS = [ + (17, 0), + (16, 0), + (15, 0), + (14, 0), + (13, 0), + (12, 0), + (11, 0), + (10, 0), + (9, 0), + (8, 0), + (7, 0), + (6, 0), + (5, 0), + (4, 0), + (3, 0), + (2, 0), + (1, 0), + (0, 17), + (0, 16), + (0, 15), + (0, 14), + (0, 13), + (0, 12), + (0, 11), + (0, 10), +] + + +if __name__ == '__main__': + main() diff --git a/c_glib/vcpkg.json b/c_glib/vcpkg.json new file mode 100644 index 0000000000000..4a14a1e437ff6 --- /dev/null +++ b/c_glib/vcpkg.json @@ -0,0 +1,8 @@ +{ + "name": "arrow-glib", + "version-string": "17.0.0-SNAPSHOT", + "dependencies": [ + "glib", + "pkgconf" + ] +} diff --git a/ci/appveyor-cpp-build.bat b/ci/appveyor-cpp-build.bat index 8cfa67c437264..f688fbb63a9ad 100644 --- a/ci/appveyor-cpp-build.bat +++ b/ci/appveyor-cpp-build.bat @@ -129,7 +129,6 @@ set PYARROW_WITH_ORC=%ARROW_ORC% set PYARROW_WITH_PARQUET=ON set PYARROW_WITH_PARQUET_ENCRYPTION=ON set PYARROW_WITH_S3=%ARROW_S3% -set PYARROW_WITH_STATIC_BOOST=ON set PYARROW_WITH_SUBSTRAIT=ON set ARROW_HOME=%CONDA_PREFIX%\Library diff --git a/ci/appveyor-cpp-setup.bat b/ci/appveyor-cpp-setup.bat index 5c4a11832d5ee..5a9dffa166fb7 100644 --- a/ci/appveyor-cpp-setup.bat +++ b/ci/appveyor-cpp-setup.bat @@ -66,6 +66,9 @@ set CONDA_PACKAGES=%CONDA_PACKAGES% --file=ci\conda_env_cpp.txt @rem Force conda to use conda-forge conda config --add channels conda-forge conda config --remove channels defaults +@rem Ensure using the latest information. If there are invalid caches, +@rem mamba may use invalid download URL. +mamba clean --all -y @rem Arrow conda environment mamba create -n arrow -y -c conda-forge ^ --file=ci\conda_env_python.txt ^ diff --git a/ci/conda_env_cpp.txt b/ci/conda_env_cpp.txt index 52e456eaab0cc..f28a24cac8d2d 100644 --- a/ci/conda_env_cpp.txt +++ b/ci/conda_env_cpp.txt @@ -21,7 +21,7 @@ azure-identity-cpp>=1.6.0 azure-storage-blobs-cpp>=12.10.0 azure-storage-common-cpp>=12.5.0 azure-storage-files-datalake-cpp>=12.9.0 -benchmark>=1.6.0 +benchmark>=1.6.0,!=1.8.4 boost-cpp>=1.68.0 brotli bzip2 diff --git a/ci/conda_env_python.txt b/ci/conda_env_python.txt index 4366e30010389..bf915493de302 100644 --- a/ci/conda_env_python.txt +++ b/ci/conda_env_python.txt @@ -26,5 +26,5 @@ numpy>=1.16.6 pytest pytest-faulthandler s3fs>=2023.10.0 -setuptools -setuptools_scm +setuptools>=64 +setuptools_scm>=8 diff --git a/ci/conda_env_sphinx.txt b/ci/conda_env_sphinx.txt index 0a356d5722c42..4665a32e24bbe 100644 --- a/ci/conda_env_sphinx.txt +++ b/ci/conda_env_sphinx.txt @@ -26,7 +26,9 @@ pydata-sphinx-theme=0.14 sphinx-autobuild sphinx-design sphinx-copybutton +sphinx-lint sphinxcontrib-jquery +sphinxcontrib-mermaid sphinx==6.2 # Requirement for doctest-cython # Needs upper pin of 0.3.0, see: diff --git a/ci/docker/conda-integration.dockerfile b/ci/docker/conda-integration.dockerfile index a747ccbc7262f..30b9cd5199fab 100644 --- a/ci/docker/conda-integration.dockerfile +++ b/ci/docker/conda-integration.dockerfile @@ -56,7 +56,7 @@ RUN wget -nv -O - https://dl.google.com/go/go${go}.linux-${arch}.tar.gz | tar -x ENV DOTNET_ROOT=/opt/dotnet \ PATH=/opt/dotnet:$PATH -RUN curl -sSL https://dot.net/v1/dotnet-install.sh | bash /dev/stdin -Channel 7.0 -InstallDir /opt/dotnet +RUN curl -sSL https://dot.net/v1/dotnet-install.sh | bash /dev/stdin -Channel 8.0 -InstallDir /opt/dotnet ENV ARROW_ACERO=OFF \ ARROW_AZURE=OFF \ diff --git a/ci/docker/debian-12-cpp.dockerfile b/ci/docker/debian-12-cpp.dockerfile index 7036ddf27d52a..d7a6f9df2c2ee 100644 --- a/ci/docker/debian-12-cpp.dockerfile +++ b/ci/docker/debian-12-cpp.dockerfile @@ -119,7 +119,6 @@ ENV ARROW_ACERO=ON \ ARROW_GANDIVA=ON \ ARROW_GCS=ON \ ARROW_HOME=/usr/local \ - ARROW_NO_DEPRECATED_API=ON \ ARROW_ORC=ON \ ARROW_PARQUET=ON \ ARROW_S3=ON \ diff --git a/ci/docker/linux-apt-docs.dockerfile b/ci/docker/linux-apt-docs.dockerfile index 76b5ae6f14363..1c916840e071b 100644 --- a/ci/docker/linux-apt-docs.dockerfile +++ b/ci/docker/linux-apt-docs.dockerfile @@ -18,21 +18,37 @@ ARG base FROM ${base} -ARG r=4.2 +ARG r=4.4 ARG jdk=8 -# See R install instructions at https://cloud.r-project.org/bin/linux/ubuntu/ +ENV PUPPETEER_EXECUTABLE_PATH=/usr/bin/chromium + +# See R install instructions at https://cloud.r-project.org/bin/linux/ RUN apt-get update -y && \ apt-get install -y \ - dirmngr \ apt-transport-https \ - software-properties-common && \ - wget -qO- https://cloud.r-project.org/bin/linux/ubuntu/marutter_pubkey.asc | \ - tee -a /etc/apt/trusted.gpg.d/cran_ubuntu_key.asc && \ - add-apt-repository 'deb https://cloud.r-project.org/bin/linux/ubuntu '$(lsb_release -cs)'-cran40/' && \ + dirmngr \ + gpg \ + lsb-release && \ + gpg --keyserver keyserver.ubuntu.com \ + --recv-key 95C0FAF38DB3CCAD0C080A7BDC78B2DDEABC47B7 && \ + gpg --export 95C0FAF38DB3CCAD0C080A7BDC78B2DDEABC47B7 | \ + gpg --no-default-keyring \ + --keyring /usr/share/keyrings/cran.gpg \ + --import - && \ + echo "deb [signed-by=/usr/share/keyrings/cran.gpg] https://cloud.r-project.org/bin/linux/$(lsb_release -is | tr 'A-Z' 'a-z') $(lsb_release -cs)-cran40/" | \ + tee /etc/apt/sources.list.d/cran.list && \ + if [ -f /etc/apt/sources.list.d/debian.sources ]; then \ + sed -i \ + -e 's/main$/main contrib non-free non-free-firmware/g' \ + /etc/apt/sources.list.d/debian.sources; \ + fi && \ + apt-get update -y && \ apt-get install -y --no-install-recommends \ autoconf-archive \ automake \ + chromium \ + chromium-sandbox \ curl \ doxygen \ gi-docgen \ @@ -48,6 +64,8 @@ RUN apt-get update -y && \ libxml2-dev \ meson \ ninja-build \ + nodejs \ + npm \ nvidia-cuda-toolkit \ openjdk-${jdk}-jdk-headless \ pandoc \ @@ -55,9 +73,12 @@ RUN apt-get update -y && \ r-base=${r}* \ rsync \ ruby-dev \ + sudo \ wget && \ apt-get clean && \ - rm -rf /var/lib/apt/lists/* + rm -rf /var/lib/apt/lists/* && \ + PUPPETEER_SKIP_CHROMIUM_DOWNLOAD=true \ + npm install -g yarn @mermaid-js/mermaid-cli ENV JAVA_HOME=/usr/lib/jvm/java-${jdk}-openjdk-amd64 @@ -68,20 +89,6 @@ RUN /arrow/ci/scripts/util_download_apache.sh \ ENV PATH=/opt/apache-maven-${maven}/bin:$PATH RUN mvn -version -ARG node=16 -RUN apt-get purge -y npm && \ - apt-get autoremove -y --purge && \ - wget -q -O - https://deb.nodesource.com/setup_${node}.x | bash - && \ - apt-get install -y nodejs && \ - apt-get clean && \ - rm -rf /var/lib/apt/lists/* && \ - npm install -g yarn - -COPY docs/requirements.txt /arrow/docs/ -RUN python3 -m venv ${ARROW_PYTHON_VENV} && \ - . ${ARROW_PYTHON_VENV}/bin/activate && \ - pip install -r arrow/docs/requirements.txt - COPY c_glib/Gemfile /arrow/c_glib/ RUN gem install --no-document bundler && \ bundle install --gemfile /arrow/c_glib/Gemfile @@ -98,6 +105,17 @@ COPY r/DESCRIPTION /arrow/r/ RUN /arrow/ci/scripts/r_deps.sh /arrow && \ R -e "install.packages('pkgdown')" +RUN useradd --user-group --create-home --groups audio,video arrow +RUN echo "arrow ALL=(ALL:ALL) NOPASSWD:ALL" | \ + EDITOR=tee visudo -f /etc/sudoers.d/arrow +USER arrow + +COPY docs/requirements.txt /arrow/docs/ +RUN sudo chown -R arrow: ${ARROW_PYTHON_VENV} && \ + python3 -m venv ${ARROW_PYTHON_VENV} && \ + . ${ARROW_PYTHON_VENV}/bin/activate && \ + pip install -r arrow/docs/requirements.txt + ENV ARROW_ACERO=ON \ ARROW_AZURE=OFF \ ARROW_BUILD_STATIC=OFF \ diff --git a/ci/docker/linux-apt-lint.dockerfile b/ci/docker/linux-apt-lint.dockerfile index 2b94a48871847..9ec80440a3c21 100644 --- a/ci/docker/linux-apt-lint.dockerfile +++ b/ci/docker/linux-apt-lint.dockerfile @@ -40,7 +40,7 @@ RUN apt-get update && \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* -ARG r=4.2 +ARG r=4.4 RUN wget -qO- https://cloud.r-project.org/bin/linux/ubuntu/marutter_pubkey.asc | \ tee -a /etc/apt/trusted.gpg.d/cran_ubuntu_key.asc && \ # NOTE: Only R >= 4.0 is available in this repo diff --git a/ci/docker/linux-apt-r.dockerfile b/ci/docker/linux-apt-r.dockerfile index d93732abb0032..630b96e1007b9 100644 --- a/ci/docker/linux-apt-r.dockerfile +++ b/ci/docker/linux-apt-r.dockerfile @@ -35,7 +35,7 @@ ENV LANG=C.UTF-8 # Build R # [1] https://www.digitalocean.com/community/tutorials/how-to-install-r-on-ubuntu-18-04 # [2] https://linuxize.com/post/how-to-install-r-on-ubuntu-18-04/#installing-r-packages-from-cran -ARG r=3.6 +ARG r=4.4 RUN apt-get update -y && \ apt-get install -y \ dirmngr \ @@ -113,7 +113,6 @@ ENV \ ARROW_GANDIVA=OFF \ ARROW_HDFS=OFF \ ARROW_JSON=ON \ - ARROW_NO_DEPRECATED_API=ON \ ARROW_ORC=OFF \ ARROW_PARQUET=ON \ ARROW_S3=ON \ diff --git a/ci/docker/linux-r.dockerfile b/ci/docker/linux-r.dockerfile index d368a6629c587..7b7e989adc0d1 100644 --- a/ci/docker/linux-r.dockerfile +++ b/ci/docker/linux-r.dockerfile @@ -27,9 +27,6 @@ ENV R_BIN=${r_bin} ARG r_dev=FALSE ENV ARROW_R_DEV=${r_dev} -ARG devtoolset_version= -ENV DEVTOOLSET_VERSION=${devtoolset_version} - ARG r_prune_deps=FALSE ENV R_PRUNE_DEPS=${r_prune_deps} diff --git a/ci/docker/ubuntu-20.04-cpp-minimal.dockerfile b/ci/docker/ubuntu-20.04-cpp-minimal.dockerfile index ae2ba9421cd55..e17c0306f115d 100644 --- a/ci/docker/ubuntu-20.04-cpp-minimal.dockerfile +++ b/ci/docker/ubuntu-20.04-cpp-minimal.dockerfile @@ -85,7 +85,6 @@ ENV ARROW_ACERO=ON \ ARROW_HDFS=ON \ ARROW_HOME=/usr/local \ ARROW_INSTALL_NAME_RPATH=OFF \ - ARROW_NO_DEPRECATED_API=ON \ ARROW_ORC=ON \ ARROW_PARQUET=ON \ ARROW_S3=ON \ diff --git a/ci/docker/ubuntu-20.04-cpp.dockerfile b/ci/docker/ubuntu-20.04-cpp.dockerfile index 124256378b287..d78c7a99cf4d6 100644 --- a/ci/docker/ubuntu-20.04-cpp.dockerfile +++ b/ci/docker/ubuntu-20.04-cpp.dockerfile @@ -158,7 +158,6 @@ ENV absl_SOURCE=BUNDLED \ ARROW_HDFS=ON \ ARROW_HOME=/usr/local \ ARROW_INSTALL_NAME_RPATH=OFF \ - ARROW_NO_DEPRECATED_API=ON \ ARROW_ORC=ON \ ARROW_PARQUET=ON \ ARROW_S3=ON \ diff --git a/ci/docker/ubuntu-22.04-cpp-minimal.dockerfile b/ci/docker/ubuntu-22.04-cpp-minimal.dockerfile index dd887a6d00ceb..341d8a87e8661 100644 --- a/ci/docker/ubuntu-22.04-cpp-minimal.dockerfile +++ b/ci/docker/ubuntu-22.04-cpp-minimal.dockerfile @@ -85,7 +85,6 @@ ENV ARROW_ACERO=ON \ ARROW_HDFS=ON \ ARROW_HOME=/usr/local \ ARROW_INSTALL_NAME_RPATH=OFF \ - ARROW_NO_DEPRECATED_API=ON \ ARROW_ORC=ON \ ARROW_PARQUET=ON \ ARROW_S3=ON \ diff --git a/ci/docker/ubuntu-22.04-cpp.dockerfile b/ci/docker/ubuntu-22.04-cpp.dockerfile index eb189841cd344..f12e7456add8e 100644 --- a/ci/docker/ubuntu-22.04-cpp.dockerfile +++ b/ci/docker/ubuntu-22.04-cpp.dockerfile @@ -196,7 +196,6 @@ ENV absl_SOURCE=BUNDLED \ ARROW_HDFS=ON \ ARROW_HOME=/usr/local \ ARROW_INSTALL_NAME_RPATH=OFF \ - ARROW_NO_DEPRECATED_API=ON \ ARROW_ORC=ON \ ARROW_PARQUET=ON \ ARROW_S3=ON \ diff --git a/ci/docker/ubuntu-22.04-csharp.dockerfile b/ci/docker/ubuntu-22.04-csharp.dockerfile index aebbd8fab74e9..4d77ba060b877 100644 --- a/ci/docker/ubuntu-22.04-csharp.dockerfile +++ b/ci/docker/ubuntu-22.04-csharp.dockerfile @@ -16,7 +16,7 @@ # under the License. ARG arch=amd64 -ARG dotnet=7.0 +ARG dotnet=8.0 ARG platform=jammy FROM mcr.microsoft.com/dotnet/sdk:${dotnet}-${platform}-${arch} diff --git a/ci/docker/ubuntu-24.04-cpp.dockerfile b/ci/docker/ubuntu-24.04-cpp.dockerfile index 4a37818f94396..ecfb5e2f5096d 100644 --- a/ci/docker/ubuntu-24.04-cpp.dockerfile +++ b/ci/docker/ubuntu-24.04-cpp.dockerfile @@ -178,7 +178,6 @@ ENV ARROW_ACERO=ON \ ARROW_HDFS=ON \ ARROW_HOME=/usr/local \ ARROW_INSTALL_NAME_RPATH=OFF \ - ARROW_NO_DEPRECATED_API=ON \ ARROW_ORC=ON \ ARROW_PARQUET=ON \ ARROW_S3=ON \ diff --git a/ci/docker/ubuntu-swift.dockerfile b/ci/docker/ubuntu-swift.dockerfile index 4789c9188c226..26950b806d1bc 100644 --- a/ci/docker/ubuntu-swift.dockerfile +++ b/ci/docker/ubuntu-swift.dockerfile @@ -15,7 +15,7 @@ # specific language governing permissions and limitations # under the License. -FROM swift:5.7.3 +FROM swift:5.9.0 # Go is needed for generating test data RUN apt-get update -y -q && \ diff --git a/ci/etc/valgrind-cran.supp b/ci/etc/valgrind-cran.supp index 4d29220260823..e93c2a3465f79 100644 --- a/ci/etc/valgrind-cran.supp +++ b/ci/etc/valgrind-cran.supp @@ -16,7 +16,7 @@ # under the License. { - # `testthat::skip()`s cause a valgrind error that does not show up on CRAN. + # `testthat::skip()`s cause a valgrind error that does not show up on CRAN. Memcheck:Cond fun:gregexpr_Regexc @@ -32,3 +32,21 @@ fun:getvar fun:bcEval } +{ + # This also doesn't seem to cause issues on CRAN, so suppress it. + + Memcheck:Leak + match-leak-kinds: possible + fun:malloc + fun:libdeflate_alloc_compressor + fun:do_memCompress + fun:bcEval_loop + fun:bcEval + fun:Rf_eval + fun:R_execClosure + fun:applyClosure_core + fun:Rf_applyClosure + fun:Rf_eval + fun:do_set + fun:Rf_eval +} diff --git a/ci/scripts/PKGBUILD b/ci/scripts/PKGBUILD index b0905886dd50f..f6bbc78be710e 100644 --- a/ci/scripts/PKGBUILD +++ b/ci/scripts/PKGBUILD @@ -18,7 +18,7 @@ _realname=arrow pkgbase=mingw-w64-${_realname} pkgname="${MINGW_PACKAGE_PREFIX}-${_realname}" -pkgver=16.0.0.9000 +pkgver=16.1.0.9000 pkgrel=8000 pkgdesc="Apache Arrow is a cross-language development platform for in-memory data (mingw-w64)" arch=("any") diff --git a/ci/scripts/c_glib_build.sh b/ci/scripts/c_glib_build.sh index c4d2c4fdb5617..ee01bb220710e 100755 --- a/ci/scripts/c_glib_build.sh +++ b/ci/scripts/c_glib_build.sh @@ -28,17 +28,35 @@ build_root=${2} : ${BUILD_DOCS_C_GLIB:=OFF} with_doc=$([ "${BUILD_DOCS_C_GLIB}" == "ON" ] && echo "true" || echo "false") -export PKG_CONFIG_PATH=${ARROW_HOME}/lib/pkgconfig -export CFLAGS="-DARROW_NO_DEPRECATED_API" -export CXXFLAGS="-DARROW_NO_DEPRECATED_API" +if [ -n "${MSYSTEM:-}" ]; then + # Fix ARROW_HOME when running under MSYS2 + export ARROW_HOME="$(cygpath --unix "${ARROW_HOME}")" +fi + +meson_pkg_config_path="${ARROW_HOME}/lib/pkgconfig" mkdir -p ${build_dir} +if [ -n "${VCPKG_ROOT:-}" ]; then + vcpkg_install_root="${build_root}/vcpkg_installed" + $VCPKG_ROOT/vcpkg install --x-manifest-root=${source_dir} --x-install-root=${vcpkg_install_root} + export PKG_CONFIG="${vcpkg_install_root}/x64-windows/tools/pkgconf/pkgconf.exe" + meson_pkg_config_path="${vcpkg_install_root}/x64-windows/lib/pkgconfig:${meson_pkg_config_path}" +fi + +if [ -n "${VCToolsInstallDir:-}" -a -n "${MSYSTEM:-}" ]; then + # Meson finds the gnu link.exe instead of MSVC link.exe when running in MSYS2/git bash, + # so we need to make sure the MSCV link.exe is first in $PATH + export PATH="$(cygpath --unix "${VCToolsInstallDir}")/bin/HostX64/x64:${PATH}" +fi + # Build with Meson meson setup \ + --backend=ninja \ --prefix=$ARROW_HOME \ --libdir=lib \ + --pkg-config-path="${meson_pkg_config_path}" \ -Ddoc=${with_doc} \ -Dvapi=${ARROW_GLIB_VAPI} \ -Dwerror=${ARROW_GLIB_WERROR} \ diff --git a/ci/scripts/cpp_build.sh b/ci/scripts/cpp_build.sh index e28ceae8801f0..3ee7fbd9d19cd 100755 --- a/ci/scripts/cpp_build.sh +++ b/ci/scripts/cpp_build.sh @@ -120,6 +120,7 @@ else -DARROW_BUILD_BENCHMARKS=${ARROW_BUILD_BENCHMARKS:-OFF} \ -DARROW_BUILD_EXAMPLES=${ARROW_BUILD_EXAMPLES:-OFF} \ -DARROW_BUILD_INTEGRATION=${ARROW_BUILD_INTEGRATION:-OFF} \ + -DARROW_BUILD_OPENMP_BENCHMARKS=${ARROW_BUILD_OPENMP_BENCHMARKS:-OFF} \ -DARROW_BUILD_SHARED=${ARROW_BUILD_SHARED:-ON} \ -DARROW_BUILD_STATIC=${ARROW_BUILD_STATIC:-ON} \ -DARROW_BUILD_TESTS=${ARROW_BUILD_TESTS:-OFF} \ @@ -136,6 +137,7 @@ else -DARROW_C_FLAGS_RELWITHDEBINFO="${ARROW_C_FLAGS_RELWITHDEBINFO:-}" \ -DARROW_DATASET=${ARROW_DATASET:-OFF} \ -DARROW_DEPENDENCY_SOURCE=${ARROW_DEPENDENCY_SOURCE:-AUTO} \ + -DARROW_DEPENDENCY_USE_SHARED=${ARROW_DEPENDENCY_USE_SHARED:-ON} \ -DARROW_ENABLE_THREADING=${ARROW_ENABLE_THREADING:-ON} \ -DARROW_ENABLE_TIMING_TESTS=${ARROW_ENABLE_TIMING_TESTS:-ON} \ -DARROW_EXTRA_ERROR_CONTEXT=${ARROW_EXTRA_ERROR_CONTEXT:-OFF} \ @@ -152,7 +154,6 @@ else -DARROW_JSON=${ARROW_JSON:-ON} \ -DARROW_LARGE_MEMORY_TESTS=${ARROW_LARGE_MEMORY_TESTS:-OFF} \ -DARROW_MIMALLOC=${ARROW_MIMALLOC:-OFF} \ - -DARROW_NO_DEPRECATED_API=${ARROW_NO_DEPRECATED_API:-OFF} \ -DARROW_ORC=${ARROW_ORC:-OFF} \ -DARROW_PARQUET=${ARROW_PARQUET:-OFF} \ -DARROW_RUNTIME_SIMD_LEVEL=${ARROW_RUNTIME_SIMD_LEVEL:-MAX} \ @@ -229,12 +230,17 @@ find . -name "*.o" -delete popd if [ -x "$(command -v ldconfig)" ]; then - ldconfig ${ARROW_HOME}/${CMAKE_INSTALL_LIBDIR:-lib} + if [ -x "$(command -v sudo)" ]; then + SUDO=sudo + else + SUDO= + fi + ${SUDO} ldconfig ${ARROW_HOME}/${CMAKE_INSTALL_LIBDIR:-lib} fi if [ "${ARROW_USE_CCACHE}" == "ON" ]; then - echo -e "===\n=== ccache statistics after build\n===" - ccache -sv 2>/dev/null || ccache -s + echo -e "===\n=== ccache statistics after build\n===" + ccache -sv 2>/dev/null || ccache -s fi if command -v sccache &> /dev/null; then @@ -244,6 +250,6 @@ fi if [ "${BUILD_DOCS_CPP}" == "ON" ]; then pushd ${source_dir}/apidoc - doxygen + OUTPUT_DIRECTORY=${build_dir}/apidoc doxygen popd fi diff --git a/ci/scripts/install_azurite.sh b/ci/scripts/install_azurite.sh index 2e7008360fdc3..dda5e99405b7f 100755 --- a/ci/scripts/install_azurite.sh +++ b/ci/scripts/install_azurite.sh @@ -19,17 +19,18 @@ set -e +# Pin azurite to 3.29.0 due to https://github.com/apache/arrow/issues/41505 case "$(uname)" in Darwin) - npm install -g azurite + npm install -g azurite@v3.29.0 which azurite ;; MINGW*) choco install nodejs.install - npm install -g azurite + npm install -g azurite@v3.29.0 ;; Linux) - npm install -g azurite + npm install -g azurite@v3.29.0 which azurite ;; esac diff --git a/ci/scripts/install_vcpkg.sh b/ci/scripts/install_vcpkg.sh index cc80582326ec5..08989d6444827 100755 --- a/ci/scripts/install_vcpkg.sh +++ b/ci/scripts/install_vcpkg.sh @@ -25,13 +25,16 @@ if [ "$#" -lt 1 ]; then fi arrow_dir=$(cd -- "$(dirname -- "$0")/../.." && pwd -P) -default_vcpkg_version=$(cat "${arrow_dir}/.env" | grep "VCPKG" | cut -d "=" -f2 | tr -d '"') default_vcpkg_ports_patch="${arrow_dir}/ci/vcpkg/ports.patch" vcpkg_destination=$1 -vcpkg_version=${2:-$default_vcpkg_version} +vcpkg_version=${2:-} vcpkg_ports_patch=${3:-$default_vcpkg_ports_patch} +if [ -z "${vcpkg_version}" ]; then + vcpkg_version=$(source "${arrow_dir}/.env" && echo "$VCPKG") +fi + # reduce the fetched data using a shallow clone git clone --shallow-since=2021-04-01 https://github.com/microsoft/vcpkg ${vcpkg_destination} diff --git a/ci/scripts/integration_arrow.sh b/ci/scripts/integration_arrow.sh index a5a012ad2c5c4..2eb58e8dc75ec 100755 --- a/ci/scripts/integration_arrow.sh +++ b/ci/scripts/integration_arrow.sh @@ -40,6 +40,8 @@ if [ "${ARROW_INTEGRATION_JAVA}" == "ON" ]; then pip install jpype1 fi +export ARROW_BUILD_ROOT=${build_dir} + # Get more detailed context on crashes export PYTHONFAULTHANDLER=1 diff --git a/ci/scripts/integration_arrow_build.sh b/ci/scripts/integration_arrow_build.sh index e5c31527aedff..9b54049a2b803 100755 --- a/ci/scripts/integration_arrow_build.sh +++ b/ci/scripts/integration_arrow_build.sh @@ -30,6 +30,8 @@ build_dir=${2} ${arrow_dir}/ci/scripts/rust_build.sh ${arrow_dir} ${build_dir} +${arrow_dir}/ci/scripts/nanoarrow_build.sh ${arrow_dir} ${build_dir} + if [ "${ARROW_INTEGRATION_CPP}" == "ON" ]; then ${arrow_dir}/ci/scripts/cpp_build.sh ${arrow_dir} ${build_dir} fi diff --git a/ci/scripts/java_build.sh b/ci/scripts/java_build.sh index 2103f0329baec..0fa1edab429c0 100755 --- a/ci/scripts/java_build.sh +++ b/ci/scripts/java_build.sh @@ -75,7 +75,16 @@ fi # Use `2 * ncores` threads mvn="${mvn} -T 2C" -pushd ${source_dir} +# https://github.com/apache/arrow/issues/41429 +# TODO: We want to out-of-source build. This is a workaround. We copy +# all needed files to the build directory from the source directory +# and build in the build directory. +mkdir -p ${build_dir} +rm -rf ${build_dir}/format +cp -aL ${arrow_dir}/format ${build_dir}/ +rm -rf ${build_dir}/java +cp -aL ${source_dir} ${build_dir}/ +pushd ${build_dir}/java if [ "${ARROW_JAVA_SHADE_FLATBUFFERS}" == "ON" ]; then mvn="${mvn} -Pshade-flatbuffers" @@ -95,7 +104,7 @@ if [ "${BUILD_DOCS_JAVA}" == "ON" ]; then # HTTP pooling is turned of to avoid download issues https://issues.apache.org/jira/browse/ARROW-11633 mkdir -p ${build_dir}/docs/java/reference ${mvn} -Dcheckstyle.skip=true -Dhttp.keepAlive=false -Dmaven.wagon.http.pool=false clean install site - rsync -a ${arrow_dir}/java/target/site/apidocs/ ${build_dir}/docs/java/reference + rsync -a target/site/apidocs/ ${build_dir}/docs/java/reference fi popd diff --git a/ci/scripts/java_cdata_integration.sh b/ci/scripts/java_cdata_integration.sh index 86ea7cf155350..0ee5d3026aa09 100755 --- a/ci/scripts/java_cdata_integration.sh +++ b/ci/scripts/java_cdata_integration.sh @@ -20,9 +20,9 @@ set -ex arrow_dir=${1} -export ARROW_SOURCE_DIR=${arrow_dir} +build_dir=${2} -pushd ${arrow_dir}/java/c/src/test/python +pushd ${build_dir}/java/c/src/test/python python integration_tests.py diff --git a/ci/scripts/java_full_build.sh b/ci/scripts/java_full_build.sh index 2734f3e9dbec2..d914aa2d8472e 100755 --- a/ci/scripts/java_full_build.sh +++ b/ci/scripts/java_full_build.sh @@ -49,21 +49,13 @@ fi # build the entire project mvn clean \ install \ - assembly:single \ - source:jar \ - javadoc:jar \ -Papache-release \ -Parrow-c-data \ -Parrow-jni \ -Darrow.cpp.build.dir=$dist_dir \ - -Darrow.c.jni.dist.dir=$dist_dir \ - -DdescriptorId=source-release + -Darrow.c.jni.dist.dir=$dist_dir # copy all jar, zip and pom files to the distribution folder -find . \ - "(" -name "*-javadoc.jar" -o -name "*-sources.jar" ")" \ - -exec echo {} ";" \ - -exec cp {} $dist_dir ";" find ~/.m2/repository/org/apache/arrow \ "(" \ -name "*.jar" -o \ diff --git a/ci/scripts/java_jni_manylinux_build.sh b/ci/scripts/java_jni_manylinux_build.sh index da4987d307ce4..6f3769751af42 100755 --- a/ci/scripts/java_jni_manylinux_build.sh +++ b/ci/scripts/java_jni_manylinux_build.sh @@ -58,7 +58,7 @@ export ARROW_ORC : ${VCPKG_ROOT:=/opt/vcpkg} : ${VCPKG_FEATURE_FLAGS:=-manifests} : ${VCPKG_TARGET_TRIPLET:=${VCPKG_DEFAULT_TRIPLET:-x64-linux-static-${CMAKE_BUILD_TYPE}}} -: ${GANDIVA_CXX_FLAGS:=-isystem;${devtoolset_include_cpp};-isystem;${devtoolset_include_cpp}/x86_64-redhat-linux;-isystem;-lpthread} +: ${GANDIVA_CXX_FLAGS:=-isystem;${devtoolset_include_cpp};-isystem;${devtoolset_include_cpp}/x86_64-redhat-linux;-lpthread} if [ "${ARROW_USE_CCACHE}" == "ON" ]; then echo "=== ccache statistics before build ===" diff --git a/ci/scripts/js_build.sh b/ci/scripts/js_build.sh index d61f74f0b7ca1..196539ee0f101 100755 --- a/ci/scripts/js_build.sh +++ b/ci/scripts/js_build.sh @@ -25,7 +25,16 @@ build_dir=${2} : ${BUILD_DOCS_JS:=OFF} -pushd ${source_dir} +# https://github.com/apache/arrow/issues/41429 +# TODO: We want to out-of-source build. This is a workaround. We copy +# all needed files to the build directory from the source directory +# and build in the build directory. +rm -rf ${build_dir}/js +mkdir -p ${build_dir} +cp -aL ${arrow_dir}/LICENSE.txt ${build_dir}/ +cp -aL ${arrow_dir}/NOTICE.txt ${build_dir}/ +cp -aL ${source_dir} ${build_dir}/js +pushd ${build_dir}/js yarn --immutable yarn lint:ci @@ -34,18 +43,18 @@ yarn build if [ "${BUILD_DOCS_JS}" == "ON" ]; then # If apache or upstream are defined use those as remote. # Otherwise use origin which could be a fork on PRs. - if [ "$(git config --get remote.apache.url)" == "git@github.com:apache/arrow.git" ]; then + if [ "$(git -C ${arrow_dir} config --get remote.apache.url)" == "git@github.com:apache/arrow.git" ]; then yarn doc --gitRemote apache - elif [[ "$(git config --get remote.upstream.url)" =~ "https://github.com/apache/arrow" ]]; then + elif [[ "$(git -C ${arrow_dir}config --get remote.upstream.url)" =~ "https://github.com/apache/arrow" ]]; then yarn doc --gitRemote upstream - elif [[ "$(basename -s .git $(git config --get remote.origin.url))" == "arrow" ]]; then + elif [[ "$(basename -s .git $(git -C ${arrow_dir} config --get remote.origin.url))" == "arrow" ]]; then yarn doc else echo "Failed to build docs because the remote is not set correctly. Please set the origin or upstream remote to https://github.com/apache/arrow.git or the apache remote to git@github.com:apache/arrow.git." exit 0 fi mkdir -p ${build_dir}/docs/js - rsync -a ${arrow_dir}/js/doc/ ${build_dir}/docs/js + rsync -a doc/ ${build_dir}/docs/js fi popd diff --git a/ci/scripts/js_test.sh b/ci/scripts/js_test.sh index 40de974ede161..863b1c3d34613 100755 --- a/ci/scripts/js_test.sh +++ b/ci/scripts/js_test.sh @@ -20,8 +20,9 @@ set -ex source_dir=${1}/js +build_dir=${2}/js -pushd ${source_dir} +pushd ${build_dir} yarn lint yarn test diff --git a/ci/scripts/nanoarrow_build.sh b/ci/scripts/nanoarrow_build.sh new file mode 100755 index 0000000000000..1612b9a2d0102 --- /dev/null +++ b/ci/scripts/nanoarrow_build.sh @@ -0,0 +1,52 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -e + +arrow_dir=${1} +source_dir=${1}/nanoarrow +build_dir=${2}/nanoarrow + +# This file is used to build the nanoarrow binaries needed for the archery +# integration tests. Testing of the nanoarrow implementation in normal CI is handled +# by github workflows in the arrow-nanoarrow repository. + +if [ "${ARCHERY_INTEGRATION_WITH_NANOARROW}" -eq "0" ]; then + echo "=====================================================================" + echo "Not building nanoarrow" + echo "=====================================================================" + exit 0; +elif [ ! -d "${source_dir}" ]; then + echo "=====================================================================" + echo "The nanoarrow source is missing. Please clone the arrow-nanoarrow repository" + echo "to arrow/nanoarrow before running the integration tests:" + echo " git clone https://github.com/apache/arrow-nanoarrow.git path/to/arrow/nanoarrow" + echo "=====================================================================" + exit 1; +fi + +set -x + +mkdir -p ${build_dir} +pushd ${build_dir} + +cmake ${source_dir} -DNANOARROW_BUILD_INTEGRATION_TESTS=ON +cmake --build . + +popd diff --git a/ci/scripts/python_build.sh b/ci/scripts/python_build.sh index 99153cdf75539..9455baf353633 100755 --- a/ci/scripts/python_build.sh +++ b/ci/scripts/python_build.sh @@ -78,17 +78,42 @@ export PYARROW_PARALLEL=${n_jobs} export CMAKE_PREFIX_PATH export LD_LIBRARY_PATH=${ARROW_HOME}/lib:${LD_LIBRARY_PATH} -pushd ${source_dir} +# https://github.com/apache/arrow/issues/41429 +# TODO: We want to out-of-source build. This is a workaround. We copy +# all needed files to the build directory from the source directory +# and build in the build directory. +rm -rf ${python_build_dir} +cp -aL ${source_dir} ${python_build_dir} +pushd ${python_build_dir} # - Cannot call setup.py as it may install in the wrong directory # on Debian/Ubuntu (ARROW-15243). # - Cannot use build isolation as we want to use specific dependency versions # (e.g. Numpy, Pandas) on some CI jobs. ${PYTHON:-python} -m pip install --no-deps --no-build-isolation -vv . -# Remove build artifacts from source directory -find build/ -user root -delete popd if [ "${BUILD_DOCS_PYTHON}" == "ON" ]; then + # https://github.com/apache/arrow/issues/41429 + # TODO: We want to out-of-source build. This is a workaround. + # + # Copy docs/source because the "autosummary_generate = True" + # configuration generates files to docs/source/python/generated/. + rm -rf ${python_build_dir}/docs/source + mkdir -p ${python_build_dir}/docs + cp -a ${arrow_dir}/docs/source ${python_build_dir}/docs/ + rm -rf ${python_build_dir}/format + cp -a ${arrow_dir}/format ${python_build_dir}/ + rm -rf ${python_build_dir}/cpp/examples + mkdir -p ${python_build_dir}/cpp + cp -a ${arrow_dir}/cpp/examples ${python_build_dir}/cpp/ + rm -rf ${python_build_dir}/ci + cp -a ${arrow_dir}/ci/ ${python_build_dir}/ ncpus=$(python -c "import os; print(os.cpu_count())") - sphinx-build -b html -j ${ncpus} ${arrow_dir}/docs/source ${build_dir}/docs + export ARROW_CPP_DOXYGEN_XML=${build_dir}/cpp/apidoc/xml + pushd ${build_dir} + sphinx-build \ + -b html \ + ${python_build_dir}/docs/source \ + ${build_dir}/docs + popd fi diff --git a/ci/scripts/r_build.sh b/ci/scripts/r_build.sh index 38b54e4434036..f4dc5a5781c6e 100755 --- a/ci/scripts/r_build.sh +++ b/ci/scripts/r_build.sh @@ -24,15 +24,29 @@ build_dir=${2} : ${BUILD_DOCS_R:=OFF} -pushd ${source_dir} +# https://github.com/apache/arrow/issues/41429 +# TODO: We want to out-of-source build. This is a workaround. We copy +# all needed files to the build directory from the source directory +# and build in the build directory. +rm -rf ${build_dir}/r +cp -aL ${source_dir} ${build_dir}/r +pushd ${build_dir}/r # build first so that any stray compiled files in r/src are ignored ${R_BIN} CMD build . -${R_BIN} CMD INSTALL ${INSTALL_ARGS} arrow*.tar.gz +if [ -x "$(command -v sudo)" ]; then + SUDO=sudo +else + SUDO= +fi +${SUDO} \ + env \ + PKG_CONFIG_PATH=${ARROW_HOME}/lib/pkgconfig:${PKG_CONFIG_PATH} \ + ${R_BIN} CMD INSTALL ${INSTALL_ARGS} arrow*.tar.gz if [ "${BUILD_DOCS_R}" == "ON" ]; then ${R_BIN} -e "pkgdown::build_site(install = FALSE)" - rsync -a ${source_dir}/docs/ ${build_dir}/docs/r + rsync -a docs/ ${build_dir}/docs/r fi popd diff --git a/ci/scripts/r_docker_configure.sh b/ci/scripts/r_docker_configure.sh index 52db2e6df6611..8a962fe576cbb 100755 --- a/ci/scripts/r_docker_configure.sh +++ b/ci/scripts/r_docker_configure.sh @@ -67,26 +67,6 @@ sloppiness = include_file_ctime hash_dir = false" >> ~/.ccache/ccache.conf fi -# Special hacking to try to reproduce quirks on centos using non-default build -# tooling. -if [[ -n "$DEVTOOLSET_VERSION" ]]; then - $PACKAGE_MANAGER install -y centos-release-scl - $PACKAGE_MANAGER install -y "devtoolset-$DEVTOOLSET_VERSION" - - # Enable devtoolset here so that `which gcc` finds the right compiler below - source /opt/rh/devtoolset-${DEVTOOLSET_VERSION}/enable - - # Build images which require the devtoolset don't have CXX17 variables - # set as the system compiler doesn't support C++17 - if [ ! "`{R_BIN} CMD config CXX17`" ]; then - mkdir -p ~/.R - echo "CC = $(which gcc) -fPIC" >> ~/.R/Makevars - echo "CXX17 = $(which g++) -fPIC" >> ~/.R/Makevars - echo "CXX17STD = -std=c++17" >> ~/.R/Makevars - echo "CXX17FLAGS = ${CXX11FLAGS}" >> ~/.R/Makevars - fi -fi - if [ -f "${ARROW_SOURCE_HOME}/ci/scripts/r_install_system_dependencies.sh" ]; then "${ARROW_SOURCE_HOME}/ci/scripts/r_install_system_dependencies.sh" fi diff --git a/ci/scripts/r_install_system_dependencies.sh b/ci/scripts/r_install_system_dependencies.sh index be0d75ef235e6..7ddc2604f661a 100755 --- a/ci/scripts/r_install_system_dependencies.sh +++ b/ci/scripts/r_install_system_dependencies.sh @@ -21,29 +21,30 @@ set -ex : ${ARROW_SOURCE_HOME:=/arrow} -if [ "$ARROW_S3" == "ON" ] || [ "$ARROW_GCS" == "ON" ] || [ "$ARROW_R_DEV" == "TRUE" ]; then - # Figure out what package manager we have - if [ "`which dnf`" ]; then - PACKAGE_MANAGER=dnf - elif [ "`which yum`" ]; then - PACKAGE_MANAGER=yum - elif [ "`which zypper`" ]; then - PACKAGE_MANAGER=zypper - else - PACKAGE_MANAGER=apt-get - apt-get update - fi +# Figure out what package manager we have +if [ "`which dnf`" ]; then + PACKAGE_MANAGER=dnf +elif [ "`which yum`" ]; then + PACKAGE_MANAGER=yum +elif [ "`which zypper`" ]; then + PACKAGE_MANAGER=zypper +else + PACKAGE_MANAGER=apt-get + apt-get update +fi - # Install curl and OpenSSL for S3/GCS support - case "$PACKAGE_MANAGER" in - apt-get) - apt-get install -y libcurl4-openssl-dev libssl-dev - ;; - *) - $PACKAGE_MANAGER install -y libcurl-devel openssl-devel - ;; - esac +# Install curl and OpenSSL (technically, only needed for S3/GCS support, but +# installing the R curl package fails without it) +case "$PACKAGE_MANAGER" in + apt-get) + apt-get install -y libcurl4-openssl-dev libssl-dev + ;; + *) + $PACKAGE_MANAGER install -y libcurl-devel openssl-devel + ;; +esac +if [ "$ARROW_S3" == "ON" ] || [ "$ARROW_GCS" == "ON" ] || [ "$ARROW_R_DEV" == "TRUE" ]; then # The Dockerfile should have put this file here if [ "$ARROW_S3" == "ON" ] && [ -f "${ARROW_SOURCE_HOME}/ci/scripts/install_minio.sh" ] && [ "`which wget`" ]; then "${ARROW_SOURCE_HOME}/ci/scripts/install_minio.sh" latest /usr/local diff --git a/ci/scripts/r_sanitize.sh b/ci/scripts/r_sanitize.sh index 600ee0fa2cbe5..fb3e9a5836387 100755 --- a/ci/scripts/r_sanitize.sh +++ b/ci/scripts/r_sanitize.sh @@ -46,10 +46,12 @@ unset ARROW_R_DEV export ARROW_R_VERBOSE_TEST=TRUE export UBSAN_OPTIONS="print_stacktrace=1,suppressions=/arrow/r/tools/ubsan.supp" +# From the old rhub image https://github.com/r-hub/rhub-linux-builders/blob/master/fedora-clang-devel-san/Dockerfile +export ASAN_OPTIONS="alloc_dealloc_mismatch=0:detect_leaks=0:detect_odr_violation=0" # run tests pushd tests -${R_BIN} < testthat.R > testthat.out 2>&1 || { cat testthat.out; exit 1; } +${R_BIN} --no-save < testthat.R > testthat.out 2>&1 || { cat testthat.out; exit 1; } cat testthat.out if grep -q "runtime error" testthat.out; then @@ -58,7 +60,7 @@ fi # run examples popd -${R_BIN} -e 'library(arrow); testthat::test_examples(".")' >> examples.out 2>&1 || { cat examples.out; exit 1; } +${R_BIN} --no-save -e 'library(arrow); testthat::test_examples(".")' >> examples.out 2>&1 || { cat examples.out; exit 1; } cat examples.out if grep -q "runtime error" examples.out; then diff --git a/ci/scripts/r_test.sh b/ci/scripts/r_test.sh index 72078ab3c06c2..fe9d18edb8cbb 100755 --- a/ci/scripts/r_test.sh +++ b/ci/scripts/r_test.sh @@ -46,7 +46,9 @@ if [ "$ARROW_USE_PKG_CONFIG" != "false" ]; then export LD_LIBRARY_PATH=${ARROW_HOME}/lib:${LD_LIBRARY_PATH} export R_LD_LIBRARY_PATH=${LD_LIBRARY_PATH} fi -export _R_CHECK_COMPILATION_FLAGS_KNOWN_=${ARROW_R_CXXFLAGS} + +export _R_CHECK_COMPILATION_FLAGS_KNOWN_="${_R_CHECK_COMPILATION_FLAGS_KNOWN_} ${ARROW_R_CXXFLAGS}" + if [ "$ARROW_R_DEV" = "TRUE" ]; then # These are sometimes used in the Arrow C++ build and are not a problem export _R_CHECK_COMPILATION_FLAGS_KNOWN_="${_R_CHECK_COMPILATION_FLAGS_KNOWN_} -Wno-attributes -msse4.2 -Wno-noexcept-type -Wno-subobject-linkage" @@ -108,16 +110,15 @@ SCRIPT="as_cran <- !identical(tolower(Sys.getenv('NOT_CRAN')), 'true') on.exit(tools::pskill(pid_flight), add = TRUE) } - run_donttest <- identical(tolower(Sys.getenv('_R_CHECK_DONTTEST_EXAMPLES_', 'true')), 'true') - if (run_donttest) { - args <- c(args, '--run-donttest') - } - install_args <- Sys.getenv('INSTALL_ARGS') if (nzchar(install_args)) { args <- c(args, paste0('--install-args=\"', install_args, '\"')) } + message('Running rcmdcheck with:\n') + print(build_args) + print(args) + rcmdcheck::rcmdcheck(build_args = build_args, args = args, error_on = 'warning', check_dir = 'check', timeout = 3600)" echo "$SCRIPT" | ${R_BIN} --no-save diff --git a/ci/scripts/r_valgrind.sh b/ci/scripts/r_valgrind.sh index a14cb803ca898..0e40d792111c4 100755 --- a/ci/scripts/r_valgrind.sh +++ b/ci/scripts/r_valgrind.sh @@ -33,7 +33,7 @@ ${R_BIN} CMD INSTALL ${INSTALL_ARGS} arrow*.tar.gz pushd tests # to generate suppression files run: -# ${R_BIN} --vanilla -d "valgrind --tool=memcheck --leak-check=full --track-origins=yes --gen-suppressions=all --log-file=memcheck.log" -f testthat.supp +# ${R_BIN} --vanilla -d "valgrind --tool=memcheck --leak-check=full --track-origins=yes --gen-suppressions=all --log-file=memcheck.log" -f testthat.R ${R_BIN} --vanilla -d "valgrind --tool=memcheck --leak-check=full --track-origins=yes --suppressions=/${1}/ci/etc/valgrind-cran.supp" -f testthat.R |& tee testthat.out # valgrind --error-exitcode=1 should return an erroring exit code that we can catch, diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index df83f56dd2f70..679842c31e0b1 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -431,10 +431,6 @@ endif() # Compiler flags # -if(ARROW_NO_DEPRECATED_API) - add_definitions(-DARROW_NO_DEPRECATED_API) -endif() - if(ARROW_EXTRA_ERROR_CONTEXT) add_definitions(-DARROW_EXTRA_ERROR_CONTEXT) endif() diff --git a/cpp/apidoc/Doxyfile b/cpp/apidoc/Doxyfile index e19c933cd454f..5be93032c00d9 100644 --- a/cpp/apidoc/Doxyfile +++ b/cpp/apidoc/Doxyfile @@ -2168,16 +2168,17 @@ INCLUDE_FILE_PATTERNS = PREDEFINED = __attribute__(x)= \ __declspec(x)= \ - PARQUET_EXPORT= \ - GANDIVA_EXPORT= \ - ARROW_EXPORT= \ ARROW_ACERO_EXPORT= \ + ARROW_ARG_UNUSED(x)=x \ + ARROW_DEPRECATED(x)= \ ARROW_DS_EXPORT= \ ARROW_ENGINE_EXPORT= \ + ARROW_EXPORT= \ + ARROW_EXTERN_TEMPLATE= \ ARROW_FLIGHT_EXPORT= \ ARROW_FLIGHT_SQL_EXPORT= \ - ARROW_EXTERN_TEMPLATE= \ - ARROW_DEPRECATED(x)= + GANDIVA_EXPORT= \ + PARQUET_EXPORT= # If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then this # tag can be used to specify a list of macro names that should be expanded. The diff --git a/cpp/build-support/update-thrift.sh b/cpp/build-support/update-thrift.sh index 9b8f2539cffe3..9e050a5e49d64 100755 --- a/cpp/build-support/update-thrift.sh +++ b/cpp/build-support/update-thrift.sh @@ -20,4 +20,4 @@ # Run this from cpp/ directory. thrift is expected to be in your path -thrift --gen cpp:moveable_types -out src/generated src/parquet/parquet.thrift +thrift --gen cpp:moveable_types,templates -out src/generated src/parquet/parquet.thrift diff --git a/cpp/cmake_modules/DefineOptions.cmake b/cpp/cmake_modules/DefineOptions.cmake index dc0e5da63adb7..41466a1c22404 100644 --- a/cpp/cmake_modules/DefineOptions.cmake +++ b/cpp/cmake_modules/DefineOptions.cmake @@ -158,8 +158,6 @@ if(ARROW_DEFINE_OPTIONS) define_option_string(ARROW_GIT_DESCRIPTION "The Arrow git commit description (if any)" "") - define_option(ARROW_NO_DEPRECATED_API "Exclude deprecated APIs from build" OFF) - define_option(ARROW_POSITION_INDEPENDENT_CODE "Whether to create position-independent target" ON) diff --git a/cpp/cmake_modules/FindProtobufAlt.cmake b/cpp/cmake_modules/FindProtobufAlt.cmake index 15fe1b4f27ef7..703e05c4731b6 100644 --- a/cpp/cmake_modules/FindProtobufAlt.cmake +++ b/cpp/cmake_modules/FindProtobufAlt.cmake @@ -28,12 +28,27 @@ endif() if(ProtobufAlt_FIND_QUIETLY) list(APPEND find_package_args QUIET) endif() -find_package(Protobuf ${find_package_args}) -set(ProtobufAlt_FOUND ${Protobuf_FOUND}) +find_package(protobuf CONFIG ${find_package_args}) +set(ProtobufAlt_FOUND ${protobuf_FOUND}) if(ProtobufAlt_FOUND) - set(ProtobufAlt_VERSION ${Protobuf_VERSION}) - set(ProtobufAlt_VERSION_MAJOR ${Protobuf_VERSION_MAJOR}) - set(ProtobufAlt_VERSION_MINOR ${Protobuf_VERSION_MINOR}) - set(ProtobufAlt_VERSION_PATCH ${Protobuf_VERSION_PATCH}) - set(ProtobufAlt_VERSION_TWEEK ${Protobuf_VERSION_TWEEK}) + if(Protobuf_PROTOC_EXECUTABLE) + # work around https://github.com/protocolbuffers/protobuf/issues/14576 + set_target_properties(protobuf::protoc PROPERTIES IMPORTED_LOCATION_RELEASE + "${Protobuf_PROTOC_EXECUTABLE}") + endif() + set(ProtobufAlt_VERSION ${protobuf_VERSION}) + set(ProtobufAlt_VERSION_MAJOR ${protobuf_VERSION_MAJOR}) + set(ProtobufAlt_VERSION_MINOR ${protobuf_VERSION_MINOR}) + set(ProtobufAlt_VERSION_PATCH ${protobuf_VERSION_PATCH}) + set(ProtobufAlt_VERSION_TWEEK ${protobuf_VERSION_TWEEK}) +else() + find_package(Protobuf ${find_package_args}) + set(ProtobufAlt_FOUND ${Protobuf_FOUND}) + if(ProtobufAlt_FOUND) + set(ProtobufAlt_VERSION ${Protobuf_VERSION}) + set(ProtobufAlt_VERSION_MAJOR ${Protobuf_VERSION_MAJOR}) + set(ProtobufAlt_VERSION_MINOR ${Protobuf_VERSION_MINOR}) + set(ProtobufAlt_VERSION_PATCH ${Protobuf_VERSION_PATCH}) + set(ProtobufAlt_VERSION_TWEEK ${Protobuf_VERSION_TWEEK}) + endif() endif() diff --git a/cpp/cmake_modules/FindorcAlt.cmake b/cpp/cmake_modules/FindorcAlt.cmake index 289416678ad39..ce8cd11b4c3f0 100644 --- a/cpp/cmake_modules/FindorcAlt.cmake +++ b/cpp/cmake_modules/FindorcAlt.cmake @@ -71,4 +71,5 @@ if(orcAlt_FOUND) PROPERTIES IMPORTED_LOCATION "${ORC_STATIC_LIB}" INTERFACE_INCLUDE_DIRECTORIES "${ORC_INCLUDE_DIR}") endif() + set(orcAlt_VERSION ${ORC_VERSION}) endif() diff --git a/cpp/cmake_modules/Findutf8proc.cmake b/cpp/cmake_modules/Findutf8proc.cmake index e347414090549..9721f76f0631b 100644 --- a/cpp/cmake_modules/Findutf8proc.cmake +++ b/cpp/cmake_modules/Findutf8proc.cmake @@ -19,7 +19,7 @@ if(utf8proc_FOUND) return() endif() -if(ARROW_PACKAGE_KIND STREQUAL "vcpkg") +if(ARROW_PACKAGE_KIND STREQUAL "vcpkg" OR VCPKG_TOOLCHAIN) set(find_package_args "") if(utf8proc_FIND_VERSION) list(APPEND find_package_args ${utf8proc_FIND_VERSION}) diff --git a/cpp/cmake_modules/SetupCxxFlags.cmake b/cpp/cmake_modules/SetupCxxFlags.cmake index d56609c123968..e2e1c4412abd0 100644 --- a/cpp/cmake_modules/SetupCxxFlags.cmake +++ b/cpp/cmake_modules/SetupCxxFlags.cmake @@ -314,6 +314,7 @@ if("${BUILD_WARNING_LEVEL}" STREQUAL "CHECKIN") set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wall") set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wextra") set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wdocumentation") + set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -DARROW_WARN_DOCUMENTATION") if(CMAKE_SYSTEM_NAME STREQUAL "Emscripten") # size_t is 32 bit in Emscripten wasm32 - ignore conversion errors set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wno-shorten-64-to-32") @@ -329,8 +330,9 @@ if("${BUILD_WARNING_LEVEL}" STREQUAL "CHECKIN") set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wall") set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wno-conversion") set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wno-sign-conversion") - set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wunused-result") set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wdate-time") + string(APPEND CXX_ONLY_FLAGS " -Wredundant-move") + set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wunused-result") elseif(CMAKE_CXX_COMPILER_ID STREQUAL "Intel" OR CMAKE_CXX_COMPILER_ID STREQUAL "IntelLLVM") if(WIN32) diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake index ddea1c399cbba..3c58ba649c4dd 100644 --- a/cpp/cmake_modules/ThirdpartyToolchain.cmake +++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake @@ -1263,7 +1263,7 @@ endif() # - S3FS and Flight benchmarks need Boost at runtime. if(ARROW_BUILD_INTEGRATION OR ARROW_BUILD_TESTS - OR (ARROW_FLIGHT AND ARROW_BUILD_BENCHMARKS) + OR (ARROW_FLIGHT AND (ARROW_TESTING OR ARROW_BUILD_BENCHMARKS)) OR (ARROW_S3 AND ARROW_BUILD_BENCHMARKS)) set(ARROW_USE_BOOST TRUE) set(ARROW_BOOST_REQUIRE_LIBRARY TRUE) @@ -2819,11 +2819,13 @@ macro(build_utf8proc) endmacro() if(ARROW_WITH_UTF8PROC) - resolve_dependency(utf8proc - PC_PACKAGE_NAMES - libutf8proc - REQUIRED_VERSION - "2.2.0") + set(utf8proc_resolve_dependency_args utf8proc PC_PACKAGE_NAMES libutf8proc) + if(NOT VCPKG_TOOLCHAIN) + # utf8proc in vcpkg doesn't provide version information: + # https://github.com/microsoft/vcpkg/issues/39176 + list(APPEND utf8proc_resolve_dependency_args REQUIRED_VERSION "2.2.0") + endif() + resolve_dependency(${utf8proc_resolve_dependency_args}) endif() macro(build_cares) @@ -4522,7 +4524,7 @@ macro(build_orc) "-DSNAPPY_HOME=${ORC_SNAPPY_ROOT}" "-DSNAPPY_LIBRARY=$" "-DLZ4_LIBRARY=$" - "-DLZ4_STATIC_LIBRARY=$" + "-DLZ4_STATIC_LIB=$" "-DLZ4_INCLUDE_DIR=${ORC_LZ4_ROOT}/include" "-DSNAPPY_INCLUDE_DIR=${ORC_SNAPPY_INCLUDE_DIR}" "-DZSTD_HOME=${ORC_ZSTD_ROOT}" @@ -5348,9 +5350,3 @@ if(ARROW_WITH_UCX) endif() message(STATUS "All bundled static libraries: ${ARROW_BUNDLED_STATIC_LIBS}") - -# Write out the package configurations. - -configure_file("src/arrow/util/config.h.cmake" "src/arrow/util/config.h" ESCAPE_QUOTES) -install(FILES "${ARROW_BINARY_DIR}/src/arrow/util/config.h" - DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/arrow/util") diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt index 2ef82dd614f84..5bcd4625b3b67 100644 --- a/cpp/src/arrow/CMakeLists.txt +++ b/cpp/src/arrow/CMakeLists.txt @@ -200,22 +200,29 @@ function(arrow_add_object_library PREFIX) set(SOURCES ${ARGN}) string(TOLOWER "${PREFIX}" prefix) if(WIN32) - add_library(${prefix}_shared OBJECT ${SOURCES}) - add_library(${prefix}_static OBJECT ${SOURCES}) - set_target_properties(${prefix}_shared PROPERTIES POSITION_INDEPENDENT_CODE ON) - set_target_properties(${prefix}_static PROPERTIES POSITION_INDEPENDENT_CODE ON) - target_compile_definitions(${prefix}_shared PRIVATE ARROW_EXPORTING) - target_compile_definitions(${prefix}_static PRIVATE ARROW_STATIC) - target_compile_features(${prefix}_shared PRIVATE cxx_std_17) - target_compile_features(${prefix}_static PRIVATE cxx_std_17) - set(${PREFIX}_TARGET_SHARED - ${prefix}_shared - PARENT_SCOPE) - set(${PREFIX}_TARGET_STATIC - ${prefix}_static - PARENT_SCOPE) + set(targets) + if(ARROW_BUILD_SHARED) + add_library(${prefix}_shared OBJECT ${SOURCES}) + set_target_properties(${prefix}_shared PROPERTIES POSITION_INDEPENDENT_CODE ON) + target_compile_definitions(${prefix}_shared PRIVATE ARROW_EXPORTING) + target_compile_features(${prefix}_shared PRIVATE cxx_std_17) + set(${PREFIX}_TARGET_SHARED + ${prefix}_shared + PARENT_SCOPE) + list(APPEND targets ${prefix}_shared) + endif() + if(ARROW_BUILD_STATIC) + add_library(${prefix}_static OBJECT ${SOURCES}) + set_target_properties(${prefix}_static PROPERTIES POSITION_INDEPENDENT_CODE ON) + target_compile_definitions(${prefix}_static PRIVATE ARROW_STATIC) + target_compile_features(${prefix}_static PRIVATE cxx_std_17) + set(${PREFIX}_TARGET_STATIC + ${prefix}_static + PARENT_SCOPE) + list(APPEND targets ${prefix}_static) + endif() set(${PREFIX}_TARGETS - ${prefix}_shared ${prefix}_static + ${targets} PARENT_SCOPE) else() add_library(${prefix} OBJECT ${SOURCES}) @@ -351,6 +358,12 @@ macro(append_runtime_avx512_src SRCS SRC) endif() endmacro() +# Write out compile-time configuration constants +configure_file("util/config.h.cmake" "util/config.h" ESCAPE_QUOTES) +configure_file("util/config_internal.h.cmake" "util/config_internal.h" ESCAPE_QUOTES) +install(FILES "${CMAKE_CURRENT_BINARY_DIR}/util/config.h" + DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/arrow/util") + set(ARROW_SRCS builder.cc buffer.cc @@ -501,6 +514,7 @@ set(ARROW_UTIL_SRCS util/decimal.cc util/delimiting.cc util/dict_util.cc + util/fixed_width_internal.cc util/float16.cc util/formatting.cc util/future.cc @@ -636,6 +650,7 @@ endif() set(ARROW_TESTING_SRCS io/test_common.cc ipc/test_common.cc + testing/fixed_width_test_util.cc testing/gtest_util.cc testing/random.cc testing/generator.cc @@ -715,7 +730,8 @@ set(ARROW_COMPUTE_SRCS compute/row/compare_internal.cc compute/row/grouper.cc compute/row/row_internal.cc - compute/util.cc) + compute/util.cc + compute/util_internal.cc) append_runtime_avx2_src(ARROW_COMPUTE_SRCS compute/key_hash_internal_avx2.cc) append_runtime_avx2_bmi2_src(ARROW_COMPUTE_SRCS compute/key_map_internal_avx2.cc) diff --git a/cpp/src/arrow/acero/CMakeLists.txt b/cpp/src/arrow/acero/CMakeLists.txt index 31ed4a6a69b6a..73079059f1dfd 100644 --- a/cpp/src/arrow/acero/CMakeLists.txt +++ b/cpp/src/arrow/acero/CMakeLists.txt @@ -173,13 +173,8 @@ add_arrow_acero_test(hash_join_node_test SOURCES hash_join_node_test.cc bloom_filter_test.cc) add_arrow_acero_test(pivot_longer_node_test SOURCES pivot_longer_node_test.cc) -# asof_join_node and sorted_merge_node use std::thread internally -# and doesn't use ThreadPool so it will -# be broken if threading is turned off -if(ARROW_ENABLE_THREADING) - add_arrow_acero_test(asof_join_node_test SOURCES asof_join_node_test.cc) - add_arrow_acero_test(sorted_merge_node_test SOURCES sorted_merge_node_test.cc) -endif() +add_arrow_acero_test(asof_join_node_test SOURCES asof_join_node_test.cc) +add_arrow_acero_test(sorted_merge_node_test SOURCES sorted_merge_node_test.cc) add_arrow_acero_test(tpch_node_test SOURCES tpch_node_test.cc) add_arrow_acero_test(union_node_test SOURCES union_node_test.cc) @@ -228,9 +223,7 @@ if(ARROW_BUILD_BENCHMARKS) add_arrow_acero_benchmark(project_benchmark SOURCES benchmark_util.cc project_benchmark.cc) - if(ARROW_ENABLE_THREADING) - add_arrow_acero_benchmark(asof_join_benchmark SOURCES asof_join_benchmark.cc) - endif() + add_arrow_acero_benchmark(asof_join_benchmark SOURCES asof_join_benchmark.cc) add_arrow_acero_benchmark(tpch_benchmark SOURCES tpch_benchmark.cc) @@ -253,9 +246,7 @@ if(ARROW_BUILD_BENCHMARKS) target_link_libraries(arrow-acero-expression-benchmark PUBLIC arrow_acero_static) target_link_libraries(arrow-acero-filter-benchmark PUBLIC arrow_acero_static) target_link_libraries(arrow-acero-project-benchmark PUBLIC arrow_acero_static) - if(ARROW_ENABLE_THREADING) - target_link_libraries(arrow-acero-asof-join-benchmark PUBLIC arrow_acero_static) - endif() + target_link_libraries(arrow-acero-asof-join-benchmark PUBLIC arrow_acero_static) target_link_libraries(arrow-acero-tpch-benchmark PUBLIC arrow_acero_static) if(ARROW_BUILD_OPENMP_BENCHMARKS) target_link_libraries(arrow-acero-hash-join-benchmark PUBLIC arrow_acero_static) @@ -264,9 +255,7 @@ if(ARROW_BUILD_BENCHMARKS) target_link_libraries(arrow-acero-expression-benchmark PUBLIC arrow_acero_shared) target_link_libraries(arrow-acero-filter-benchmark PUBLIC arrow_acero_shared) target_link_libraries(arrow-acero-project-benchmark PUBLIC arrow_acero_shared) - if(ARROW_ENABLE_THREADING) - target_link_libraries(arrow-acero-asof-join-benchmark PUBLIC arrow_acero_shared) - endif() + target_link_libraries(arrow-acero-asof-join-benchmark PUBLIC arrow_acero_shared) target_link_libraries(arrow-acero-tpch-benchmark PUBLIC arrow_acero_shared) if(ARROW_BUILD_OPENMP_BENCHMARKS) target_link_libraries(arrow-acero-hash-join-benchmark PUBLIC arrow_acero_shared) diff --git a/cpp/src/arrow/acero/aggregate_internal.cc b/cpp/src/arrow/acero/aggregate_internal.cc index 9c4b7fe5ae98c..0c1bc3db365a6 100644 --- a/cpp/src/arrow/acero/aggregate_internal.cc +++ b/cpp/src/arrow/acero/aggregate_internal.cc @@ -102,7 +102,7 @@ Result> InitKernel(const HashAggregateKernel* kerne ARROW_ASSIGN_OR_RAISE( auto state, kernel->init(&kernel_ctx, KernelInitArgs{kernel, aggr_in_types, options})); - return std::move(state); + return state; } Result> GetKernels( @@ -129,7 +129,7 @@ Result>> InitKernels( ARROW_ASSIGN_OR_RAISE(states[i], InitKernel(kernels[i], ctx, aggregates[i], in_types[i])); } - return std::move(states); + return states; } Result ResolveKernels( @@ -242,7 +242,7 @@ Result> ExtractValues(const ExecBatch& input_batch, DCHECK(false); } } - return std::move(values); + return values; } } // namespace aggregate diff --git a/cpp/src/arrow/acero/asof_join_node.cc b/cpp/src/arrow/acero/asof_join_node.cc index 48cc83dd3d6a9..848cbdf7506ad 100644 --- a/cpp/src/arrow/acero/asof_join_node.cc +++ b/cpp/src/arrow/acero/asof_join_node.cc @@ -548,8 +548,10 @@ class InputState { // true when the queue is empty and, when memo may have future entries (the case of a // positive tolerance), when the memo is empty. // used when checking whether RHS is up to date with LHS. - bool CurrentEmpty() const { - return memo_.no_future_ ? Empty() : memo_.times_.empty() && Empty(); + // NOTE: The emptiness must be decided by a single call to Empty() in caller, due to the + // potential race with Push(), see GH-41614. + bool CurrentEmpty(bool empty) const { + return memo_.no_future_ ? empty : (memo_.times_.empty() && empty); } // in case memo may not have future entries (the case of a non-positive tolerance), @@ -650,13 +652,15 @@ class InputState { // timestamp, update latest_time and latest_ref_row to the value that immediately pass // the horizon. Update the memo-store with any entries or future entries so observed. // Returns true if updates were made, false if not. - Result AdvanceAndMemoize(OnType ts) { + // NOTE: The emptiness must be decided by a single call to Empty() in caller, due to the + // potential race with Push(), see GH-41614. + Result AdvanceAndMemoize(OnType ts, bool empty) { // Advance the right side row index until we reach the latest right row (for each key) // for the given left timestamp. DEBUG_SYNC(node_, "Advancing input ", index_, DEBUG_MANIP(std::endl)); // Check if already updated for TS (or if there is no latest) - if (Empty()) { // can't advance if empty and no future entries + if (empty) { // can't advance if empty and no future entries return memo_.no_future_ ? false : memo_.RemoveEntriesWithLesserTime(ts); } @@ -918,34 +922,46 @@ class CompositeTableBuilder { // guaranteeing this probability is below 1 in a billion. The fix is 128-bit hashing. // See ARROW-17653 class AsofJoinNode : public ExecNode { - // Advances the RHS as far as possible to be up to date for the current LHS timestamp - Result UpdateRhs() { + // A simple wrapper for the result of a single call to UpdateRhs(), identifying: + // 1) If any RHS has advanced. + // 2) If all RHS are up to date with LHS. + struct RhsUpdateState { + bool any_advanced; + bool all_up_to_date_with_lhs; + }; + // Advances the RHS as far as possible to be up to date for the current LHS timestamp, + // and checks if all RHS are up to date with LHS. The reason they have to be performed + // together is that they both depend on the emptiness of the RHS, which can be changed + // by Push() executing in another thread. + Result UpdateRhs() { auto& lhs = *state_.at(0); auto lhs_latest_time = lhs.GetLatestTime(); - bool any_updated = false; - for (size_t i = 1; i < state_.size(); ++i) { - ARROW_ASSIGN_OR_RAISE(bool advanced, state_[i]->AdvanceAndMemoize(lhs_latest_time)); - any_updated |= advanced; - } - return any_updated; - } - - // Returns false if RHS not up to date for LHS - bool IsUpToDateWithLhsRow() const { - auto& lhs = *state_[0]; - if (lhs.Empty()) return false; // can't proceed if nothing on the LHS - OnType lhs_ts = lhs.GetLatestTime(); + RhsUpdateState update_state{/*any_advanced=*/false, /*all_up_to_date_with_lhs=*/true}; for (size_t i = 1; i < state_.size(); ++i) { auto& rhs = *state_[i]; - if (!rhs.Finished()) { + + // Obtain RHS emptiness once for subsequent AdvanceAndMemoize() and CurrentEmpty(). + bool rhs_empty = rhs.Empty(); + // Obtain RHS current time here because AdvanceAndMemoize() can change the + // emptiness. + OnType rhs_current_time = rhs_empty ? OnType{} : rhs.GetLatestTime(); + + ARROW_ASSIGN_OR_RAISE(bool advanced, + rhs.AdvanceAndMemoize(lhs_latest_time, rhs_empty)); + update_state.any_advanced |= advanced; + + if (update_state.all_up_to_date_with_lhs && !rhs.Finished()) { // If RHS is finished, then we know it's up to date - if (rhs.CurrentEmpty()) - return false; // RHS isn't finished, but is empty --> not up to date - if (lhs_ts > rhs.GetCurrentTime()) - return false; // RHS isn't up to date (and not finished) + if (rhs.CurrentEmpty(rhs_empty)) { + // RHS isn't finished, but is empty --> not up to date + update_state.all_up_to_date_with_lhs = false; + } else if (lhs_latest_time > rhs_current_time) { + // RHS isn't up to date (and not finished) + update_state.all_up_to_date_with_lhs = false; + } } } - return true; + return update_state; } Result> ProcessInner() { @@ -963,20 +979,19 @@ class AsofJoinNode : public ExecNode { // If LHS is finished or empty then there's nothing we can do here if (lhs.Finished() || lhs.Empty()) break; - // Advance each of the RHS as far as possible to be up to date for the LHS timestamp - ARROW_ASSIGN_OR_RAISE(bool any_rhs_advanced, UpdateRhs()); + ARROW_ASSIGN_OR_RAISE(auto rhs_update_state, UpdateRhs()); // If we have received enough inputs to produce the next output batch // (decided by IsUpToDateWithLhsRow), we will perform the join and // materialize the output batch. The join is done by advancing through // the LHS and adding joined row to rows_ (done by Emplace). Finally, // input batches that are no longer needed are removed to free up memory. - if (IsUpToDateWithLhsRow()) { + if (rhs_update_state.all_up_to_date_with_lhs) { dst.Emplace(state_, tolerance_); ARROW_ASSIGN_OR_RAISE(bool advanced, lhs.Advance()); if (!advanced) break; // if we can't advance LHS, we're done for this batch } else { - if (!any_rhs_advanced) break; // need to wait for new data + if (!rhs_update_state.any_advanced) break; // need to wait for new data } } @@ -999,6 +1014,8 @@ class AsofJoinNode : public ExecNode { } } +#ifdef ARROW_ENABLE_THREADING + template struct Defer { Callable callable; @@ -1085,6 +1102,7 @@ class AsofJoinNode : public ExecNode { } static void ProcessThreadWrapper(AsofJoinNode* node) { node->ProcessThread(); } +#endif public: AsofJoinNode(ExecPlan* plan, NodeVector inputs, std::vector input_labels, @@ -1116,8 +1134,10 @@ class AsofJoinNode : public ExecNode { } virtual ~AsofJoinNode() { - process_.Push(false); // poison pill +#ifdef ARROW_ENABLE_THREADING + PushProcess(false); process_thread_.join(); +#endif } const std::vector& indices_of_on_key() { return indices_of_on_key_; } @@ -1395,7 +1415,8 @@ class AsofJoinNode : public ExecNode { rb->ToString(), DEBUG_MANIP(std::endl)); ARROW_RETURN_NOT_OK(state_.at(k)->Push(rb)); - process_.Push(true); + PushProcess(true); + return Status::OK(); } @@ -1410,22 +1431,77 @@ class AsofJoinNode : public ExecNode { // The reason for this is that there are cases at the end of a table where we don't // know whether the RHS of the join is up-to-date until we know that the table is // finished. - process_.Push(true); + PushProcess(true); + return Status::OK(); } + void PushProcess(bool value) { +#ifdef ARROW_ENABLE_THREADING + process_.Push(value); +#else + if (value) { + ProcessNonThreaded(); + } else if (!process_task_.is_finished()) { + EndFromSingleThread(); + } +#endif + } - Status StartProducing() override { #ifndef ARROW_ENABLE_THREADING - return Status::NotImplemented("ASOF join requires threading enabled"); + bool ProcessNonThreaded() { + while (!process_task_.is_finished()) { + Result> result = ProcessInner(); + + if (result.ok()) { + auto out_rb = *result; + if (!out_rb) break; + ExecBatch out_b(*out_rb); + out_b.index = batches_produced_++; + DEBUG_SYNC(this, "produce batch ", out_b.index, ":", DEBUG_MANIP(std::endl), + out_rb->ToString(), DEBUG_MANIP(std::endl)); + Status st = output_->InputReceived(this, std::move(out_b)); + if (!st.ok()) { + // this isn't really from a thread, + // but we call through to this for consistency + EndFromSingleThread(std::move(st)); + return false; + } + } else { + // this isn't really from a thread, + // but we call through to this for consistency + EndFromSingleThread(result.status()); + return false; + } + } + auto& lhs = *state_.at(0); + if (lhs.Finished() && !process_task_.is_finished()) { + EndFromSingleThread(Status::OK()); + } + return true; + } + + void EndFromSingleThread(Status st = Status::OK()) { + process_task_.MarkFinished(st); + if (st.ok()) { + st = output_->InputFinished(this, batches_produced_); + } + for (const auto& s : state_) { + st &= s->ForceShutdown(); + } + } + #endif + Status StartProducing() override { ARROW_ASSIGN_OR_RAISE(process_task_, plan_->query_context()->BeginExternalTask( "AsofJoinNode::ProcessThread")); if (!process_task_.is_valid()) { // Plan has already aborted. Do not start process thread return Status::OK(); } +#ifdef ARROW_ENABLE_THREADING process_thread_ = std::thread(&AsofJoinNode::ProcessThreadWrapper, this); +#endif return Status::OK(); } @@ -1433,8 +1509,10 @@ class AsofJoinNode : public ExecNode { void ResumeProducing(ExecNode* output, int32_t counter) override {} Status StopProducingImpl() override { +#ifdef ARROW_ENABLE_THREADING process_.Clear(); - process_.Push(false); +#endif + PushProcess(false); return Status::OK(); } @@ -1464,11 +1542,13 @@ class AsofJoinNode : public ExecNode { // Backpressure counter common to all inputs std::atomic backpressure_counter_; +#ifdef ARROW_ENABLE_THREADING // Queue for triggering processing of a given input // (a false value is a poison pill) ConcurrentQueue process_; // Worker thread std::thread process_thread_; +#endif Future<> process_task_; // In-progress batches produced @@ -1496,9 +1576,13 @@ AsofJoinNode::AsofJoinNode(ExecPlan* plan, NodeVector inputs, debug_os_(join_options.debug_opts ? join_options.debug_opts->os : nullptr), debug_mutex_(join_options.debug_opts ? join_options.debug_opts->mutex : nullptr), #endif - backpressure_counter_(1), + backpressure_counter_(1) +#ifdef ARROW_ENABLE_THREADING + , process_(), - process_thread_() { + process_thread_() +#endif +{ for (auto& key_hasher : key_hashers_) { key_hasher->node_ = this; } diff --git a/cpp/src/arrow/acero/asof_join_node_test.cc b/cpp/src/arrow/acero/asof_join_node_test.cc index d95d2aaad3643..051e280a4c53c 100644 --- a/cpp/src/arrow/acero/asof_join_node_test.cc +++ b/cpp/src/arrow/acero/asof_join_node_test.cc @@ -1678,5 +1678,59 @@ TEST(AsofJoinTest, BackpressureWithBatchesGen) { /*slow_r0=*/false); } +// Reproduction of GH-40675: A logical race between Process() and Push() that can be more +// easily observed with single small batch. +TEST(AsofJoinTest, RhsEmptinessRace) { + auto left_batch = ExecBatchFromJSON( + {int64(), utf8()}, R"([[1, "a"], [1, "b"], [5, "a"], [6, "b"], [7, "f"]])"); + auto right_batch = ExecBatchFromJSON( + {int64(), utf8(), float64()}, R"([[2, "a", 1.0], [9, "b", 3.0], [15, "g", 5.0]])"); + + Declaration left{ + "exec_batch_source", + ExecBatchSourceNodeOptions(schema({field("colA", int64()), field("col2", utf8())}), + {std::move(left_batch)})}; + Declaration right{ + "exec_batch_source", + ExecBatchSourceNodeOptions(schema({field("colB", int64()), field("col3", utf8()), + field("colC", float64())}), + {std::move(right_batch)})}; + AsofJoinNodeOptions asof_join_opts({{{"colA"}, {{"col2"}}}, {{"colB"}, {{"col3"}}}}, 1); + Declaration asof_join{ + "asofjoin", {std::move(left), std::move(right)}, std::move(asof_join_opts)}; + + ASSERT_OK_AND_ASSIGN(auto result, DeclarationToExecBatches(std::move(asof_join))); + + auto exp_batch = ExecBatchFromJSON( + {int64(), utf8(), float64()}, + R"([[1, "a", 1.0], [1, "b", null], [5, "a", null], [6, "b", null], [7, "f", null]])"); + AssertExecBatchesEqualIgnoringOrder(result.schema, {exp_batch}, result.batches); +} + +// Reproduction of GH-41149: Another case of the same root cause as GH-40675, but with +// empty "by" columns. +TEST(AsofJoinTest, RhsEmptinessRaceEmptyBy) { + auto left_batch = ExecBatchFromJSON({int64()}, R"([[1], [2], [3]])"); + auto right_batch = + ExecBatchFromJSON({utf8(), int64()}, R"([["Z", 2], ["B", 3], ["A", 4]])"); + + Declaration left{"exec_batch_source", + ExecBatchSourceNodeOptions(schema({field("on", int64())}), + {std::move(left_batch)})}; + Declaration right{ + "exec_batch_source", + ExecBatchSourceNodeOptions(schema({field("colVals", utf8()), field("on", int64())}), + {std::move(right_batch)})}; + AsofJoinNodeOptions asof_join_opts({{{"on"}, {}}, {{"on"}, {}}}, 1); + Declaration asof_join{ + "asofjoin", {std::move(left), std::move(right)}, std::move(asof_join_opts)}; + + ASSERT_OK_AND_ASSIGN(auto result, DeclarationToExecBatches(std::move(asof_join))); + + auto exp_batch = + ExecBatchFromJSON({int64(), utf8()}, R"([[1, "Z"], [2, "Z"], [3, "B"]])"); + AssertExecBatchesEqualIgnoringOrder(result.schema, {exp_batch}, result.batches); +} + } // namespace acero } // namespace arrow diff --git a/cpp/src/arrow/acero/backpressure_handler.h b/cpp/src/arrow/acero/backpressure_handler.h index 178272315d7fb..db6c3799354af 100644 --- a/cpp/src/arrow/acero/backpressure_handler.h +++ b/cpp/src/arrow/acero/backpressure_handler.h @@ -45,7 +45,7 @@ class BackpressureHandler { } BackpressureHandler backpressure_handler(input, low_threshold, high_threshold, std::move(backpressure_control)); - return std::move(backpressure_handler); + return backpressure_handler; } void Handle(size_t start_level, size_t end_level) { diff --git a/cpp/src/arrow/acero/exec_plan.cc b/cpp/src/arrow/acero/exec_plan.cc index 97119726d4b17..d9fb1942fccd8 100644 --- a/cpp/src/arrow/acero/exec_plan.cc +++ b/cpp/src/arrow/acero/exec_plan.cc @@ -128,7 +128,7 @@ struct ExecPlanImpl : public ExecPlan { Future<> scheduler_finished = arrow::util::AsyncTaskScheduler::Make( [this](arrow::util::AsyncTaskScheduler* async_scheduler) { QueryContext* ctx = query_context(); - RETURN_NOT_OK(ctx->Init(ctx->max_concurrency(), async_scheduler)); + RETURN_NOT_OK(ctx->Init(async_scheduler)); #ifdef ARROW_WITH_OPENTELEMETRY if (HasMetadata()) { diff --git a/cpp/src/arrow/acero/hash_aggregate_test.cc b/cpp/src/arrow/acero/hash_aggregate_test.cc index 2626fd50379dd..743cb20d1960d 100644 --- a/cpp/src/arrow/acero/hash_aggregate_test.cc +++ b/cpp/src/arrow/acero/hash_aggregate_test.cc @@ -318,7 +318,7 @@ Result RunGroupBy(const BatchesWithSchema& input, { {"source", SourceNodeOptions{input.schema, input.gen(use_threads, /*slow=*/false)}}, - {"aggregate", AggregateNodeOptions{std::move(aggregates), std::move(keys), + {"aggregate", AggregateNodeOptions{aggregates, std::move(keys), std::move(segment_keys)}}, {"sink", SinkNodeOptions{&sink_gen}}, }) @@ -592,6 +592,12 @@ void TestSegments(std::unique_ptr& segmenter, const ExecSpan& batc ASSERT_EQ(expected_segment, segment); offset = segment.offset + segment.length; } + // Assert next is the last (empty) segment. + ASSERT_OK_AND_ASSIGN(auto segment, segmenter->GetNextSegment(batch, offset)); + ASSERT_GE(segment.offset, batch.length); + ASSERT_EQ(segment.length, 0); + ASSERT_TRUE(segment.is_open); + ASSERT_TRUE(segment.extends); } Result> MakeGrouper(const std::vector& key_types) { @@ -682,48 +688,142 @@ TEST(RowSegmenter, Basics) { } TEST(RowSegmenter, NonOrdered) { - std::vector types = {int32()}; - auto batch = ExecBatchFromJSON(types, "[[1], [1], [2], [1], [2]]"); - ASSERT_OK_AND_ASSIGN(auto segmenter, MakeRowSegmenter(types)); - TestSegments(segmenter, ExecSpan(batch), - {{0, 2, false, true}, - {2, 1, false, false}, - {3, 1, false, false}, - {4, 1, true, false}, - {5, 0, true, true}}); + { + std::vector types = {int32()}; + auto batch = ExecBatchFromJSON(types, "[[1], [1], [2], [1], [2]]"); + ASSERT_OK_AND_ASSIGN(auto segmenter, MakeRowSegmenter(types)); + TestSegments(segmenter, ExecSpan(batch), + {{0, 2, false, true}, + {2, 1, false, false}, + {3, 1, false, false}, + {4, 1, true, false}, + {5, 0, true, true}}); + } + { + std::vector types = {int32(), int32()}; + auto batch = ExecBatchFromJSON(types, "[[1, 1], [1, 1], [2, 2], [1, 2], [2, 2]]"); + ASSERT_OK_AND_ASSIGN(auto segmenter, MakeRowSegmenter(types)); + TestSegments(segmenter, ExecSpan(batch), + {{0, 2, false, true}, + {2, 1, false, false}, + {3, 1, false, false}, + {4, 1, true, false}, + {5, 0, true, true}}); + } } TEST(RowSegmenter, EmptyBatches) { - std::vector types = {int32()}; - std::vector batches = { - ExecBatchFromJSON(types, "[]"), ExecBatchFromJSON(types, "[]"), - ExecBatchFromJSON(types, "[[1]]"), ExecBatchFromJSON(types, "[]"), - ExecBatchFromJSON(types, "[[1]]"), ExecBatchFromJSON(types, "[]"), - ExecBatchFromJSON(types, "[[2], [2]]"), ExecBatchFromJSON(types, "[]"), - }; - ASSERT_OK_AND_ASSIGN(auto segmenter, MakeRowSegmenter(types)); - TestSegments(segmenter, ExecSpan(batches[0]), {}); - TestSegments(segmenter, ExecSpan(batches[1]), {}); - TestSegments(segmenter, ExecSpan(batches[2]), {{0, 1, true, true}}); - TestSegments(segmenter, ExecSpan(batches[3]), {}); - TestSegments(segmenter, ExecSpan(batches[4]), {{0, 1, true, true}}); - TestSegments(segmenter, ExecSpan(batches[5]), {}); - TestSegments(segmenter, ExecSpan(batches[6]), {{0, 2, true, false}}); - TestSegments(segmenter, ExecSpan(batches[7]), {}); + { + std::vector types = {int32()}; + std::vector batches = { + ExecBatchFromJSON(types, "[]"), ExecBatchFromJSON(types, "[]"), + ExecBatchFromJSON(types, "[[1]]"), ExecBatchFromJSON(types, "[]"), + ExecBatchFromJSON(types, "[[1]]"), ExecBatchFromJSON(types, "[]"), + ExecBatchFromJSON(types, "[[2], [2]]"), ExecBatchFromJSON(types, "[]"), + }; + ASSERT_OK_AND_ASSIGN(auto segmenter, MakeRowSegmenter(types)); + TestSegments(segmenter, ExecSpan(batches[0]), {}); + TestSegments(segmenter, ExecSpan(batches[1]), {}); + TestSegments(segmenter, ExecSpan(batches[2]), {{0, 1, true, true}}); + TestSegments(segmenter, ExecSpan(batches[3]), {}); + TestSegments(segmenter, ExecSpan(batches[4]), {{0, 1, true, true}}); + TestSegments(segmenter, ExecSpan(batches[5]), {}); + TestSegments(segmenter, ExecSpan(batches[6]), {{0, 2, true, false}}); + TestSegments(segmenter, ExecSpan(batches[7]), {}); + } + { + std::vector types = {int32(), int32()}; + std::vector batches = { + ExecBatchFromJSON(types, "[]"), + ExecBatchFromJSON(types, "[]"), + ExecBatchFromJSON(types, "[[1, 1]]"), + ExecBatchFromJSON(types, "[]"), + ExecBatchFromJSON(types, "[[1, 1]]"), + ExecBatchFromJSON(types, "[]"), + ExecBatchFromJSON(types, "[[2, 2], [2, 2]]"), + ExecBatchFromJSON(types, "[]"), + }; + ASSERT_OK_AND_ASSIGN(auto segmenter, MakeRowSegmenter(types)); + TestSegments(segmenter, ExecSpan(batches[0]), {}); + TestSegments(segmenter, ExecSpan(batches[1]), {}); + TestSegments(segmenter, ExecSpan(batches[2]), {{0, 1, true, true}}); + TestSegments(segmenter, ExecSpan(batches[3]), {}); + TestSegments(segmenter, ExecSpan(batches[4]), {{0, 1, true, true}}); + TestSegments(segmenter, ExecSpan(batches[5]), {}); + TestSegments(segmenter, ExecSpan(batches[6]), {{0, 2, true, false}}); + TestSegments(segmenter, ExecSpan(batches[7]), {}); + } } TEST(RowSegmenter, MultipleSegments) { - std::vector types = {int32()}; - auto batch = ExecBatchFromJSON(types, "[[1], [1], [2], [5], [3], [3], [5], [5], [4]]"); - ASSERT_OK_AND_ASSIGN(auto segmenter, MakeRowSegmenter(types)); - TestSegments(segmenter, ExecSpan(batch), - {{0, 2, false, true}, - {2, 1, false, false}, - {3, 1, false, false}, - {4, 2, false, false}, - {6, 2, false, false}, - {8, 1, true, false}, - {9, 0, true, true}}); + { + std::vector types = {int32()}; + auto batch = + ExecBatchFromJSON(types, "[[1], [1], [2], [5], [3], [3], [5], [5], [4]]"); + ASSERT_OK_AND_ASSIGN(auto segmenter, MakeRowSegmenter(types)); + TestSegments(segmenter, ExecSpan(batch), + {{0, 2, false, true}, + {2, 1, false, false}, + {3, 1, false, false}, + {4, 2, false, false}, + {6, 2, false, false}, + {8, 1, true, false}, + {9, 0, true, true}}); + } + { + std::vector types = {int32(), int32()}; + auto batch = ExecBatchFromJSON( + types, + "[[1, 1], [1, 1], [2, 2], [5, 5], [3, 3], [3, 3], [5, 5], [5, 5], [4, 4]]"); + ASSERT_OK_AND_ASSIGN(auto segmenter, MakeRowSegmenter(types)); + TestSegments(segmenter, ExecSpan(batch), + {{0, 2, false, true}, + {2, 1, false, false}, + {3, 1, false, false}, + {4, 2, false, false}, + {6, 2, false, false}, + {8, 1, true, false}, + {9, 0, true, true}}); + } +} + +TEST(RowSegmenter, MultipleSegmentsMultipleBatches) { + { + std::vector types = {int32()}; + std::vector batches = { + ExecBatchFromJSON(types, "[[1]]"), ExecBatchFromJSON(types, "[[1], [2]]"), + ExecBatchFromJSON(types, "[[5], [3]]"), + ExecBatchFromJSON(types, "[[3], [5], [5]]"), ExecBatchFromJSON(types, "[[4]]")}; + + ASSERT_OK_AND_ASSIGN(auto segmenter, MakeRowSegmenter(types)); + TestSegments(segmenter, ExecSpan(batches[0]), {{0, 1, true, true}}); + TestSegments(segmenter, ExecSpan(batches[1]), + {{0, 1, false, true}, {1, 1, true, false}}); + TestSegments(segmenter, ExecSpan(batches[2]), + {{0, 1, false, false}, {1, 1, true, false}}); + TestSegments(segmenter, ExecSpan(batches[3]), + {{0, 1, false, true}, {1, 2, true, false}}); + TestSegments(segmenter, ExecSpan(batches[4]), {{0, 1, true, false}}); + } + { + std::vector types = {int32(), int32()}; + std::vector batches = { + ExecBatchFromJSON(types, "[[1, 1]]"), + ExecBatchFromJSON(types, "[[1, 1], [2, 2]]"), + ExecBatchFromJSON(types, "[[5, 5], [3, 3]]"), + ExecBatchFromJSON(types, "[[3, 3], [5, 5], [5, 5]]"), + ExecBatchFromJSON(types, "[[4, 4]]")}; + + ASSERT_OK_AND_ASSIGN(auto segmenter, MakeRowSegmenter(types)); + TestSegments(segmenter, ExecSpan(batches[0]), {{0, 1, true, true}}); + TestSegments(segmenter, ExecSpan(batches[1]), + {{0, 1, false, true}, {1, 1, true, false}}); + TestSegments(segmenter, ExecSpan(batches[2]), + {{0, 1, false, false}, {1, 1, true, false}}); + TestSegments(segmenter, ExecSpan(batches[3]), + {{0, 1, false, true}, {1, 2, true, false}}); + TestSegments(segmenter, ExecSpan(batches[4]), {{0, 1, true, false}}); + } } namespace { diff --git a/cpp/src/arrow/acero/hash_join.cc b/cpp/src/arrow/acero/hash_join.cc index 296b2c56e00f4..5aa70a23f7c9e 100644 --- a/cpp/src/arrow/acero/hash_join.cc +++ b/cpp/src/arrow/acero/hash_join.cc @@ -791,7 +791,7 @@ class HashJoinBasicImpl : public HashJoinImpl { Result> HashJoinImpl::MakeBasic() { std::unique_ptr impl{new HashJoinBasicImpl()}; - return std::move(impl); + return impl; } } // namespace acero diff --git a/cpp/src/arrow/acero/hash_join_benchmark.cc b/cpp/src/arrow/acero/hash_join_benchmark.cc index ad1bd67cc8ec7..1f8e02e9f0fcf 100644 --- a/cpp/src/arrow/acero/hash_join_benchmark.cc +++ b/cpp/src/arrow/acero/hash_join_benchmark.cc @@ -148,7 +148,7 @@ class JoinBenchmark { }; scheduler_ = TaskScheduler::Make(); - DCHECK_OK(ctx_.Init(settings.num_threads, nullptr)); + DCHECK_OK(ctx_.Init(nullptr)); auto register_task_group_callback = [&](std::function task, std::function cont) { diff --git a/cpp/src/arrow/acero/hash_join_node.cc b/cpp/src/arrow/acero/hash_join_node.cc index b49364300dac8..67f902e64be93 100644 --- a/cpp/src/arrow/acero/hash_join_node.cc +++ b/cpp/src/arrow/acero/hash_join_node.cc @@ -351,7 +351,7 @@ Result HashJoinSchema::BindFilter(Expression filter, const Schema& right_schema, ExecContext* exec_context) { if (filter.IsBound() || filter == literal(true)) { - return std::move(filter); + return filter; } // Step 1: Construct filter schema FieldVector fields; @@ -386,7 +386,7 @@ Result HashJoinSchema::BindFilter(Expression filter, filter.ToString(), " evaluates to ", filter.type()->ToString()); } - return std::move(filter); + return filter; } Expression HashJoinSchema::RewriteFilterToUseFilterSchema( @@ -497,11 +497,11 @@ struct BloomFilterPushdownContext { using BuildFinishedCallback = std::function; using FiltersReceivedCallback = std::function; using FilterFinishedCallback = std::function; - void Init(HashJoinNode* owner, size_t num_threads, - RegisterTaskGroupCallback register_task_group_callback, - StartTaskGroupCallback start_task_group_callback, - FiltersReceivedCallback on_bloom_filters_received, bool disable_bloom_filter, - bool use_sync_execution); + Status Init(HashJoinNode* owner, size_t num_threads, + RegisterTaskGroupCallback register_task_group_callback, + StartTaskGroupCallback start_task_group_callback, + FiltersReceivedCallback on_bloom_filters_received, + bool disable_bloom_filter, bool use_sync_execution); Status StartProducing(size_t thread_index); @@ -559,8 +559,7 @@ struct BloomFilterPushdownContext { std::vector hashes(batch.length); std::vector bv(bit_vector_bytes); - ARROW_ASSIGN_OR_RAISE(arrow::util::TempVectorStack * stack, - ctx_->GetTempStack(thread_index)); + arrow::util::TempVectorStack* stack = &tld_[thread_index].stack; // Start with full selection for the current batch memset(selected.data(), 0xff, bit_vector_bytes); @@ -654,7 +653,17 @@ struct BloomFilterPushdownContext { FiltersReceivedCallback all_received_callback_; FilterFinishedCallback on_finished_; } eval_; + + static constexpr auto kTempStackUsage = + Hashing32::kHashBatchTempStackUsage + + (sizeof(uint32_t) + /*extra=*/1) * arrow::util::MiniBatch::kMiniBatchLength; + + struct ThreadLocalData { + arrow::util::TempVectorStack stack; + }; + std::vector tld_; }; + bool HashJoinSchema::HasDictionaries() const { for (int side = 0; side <= 1; ++side) { for (int icol = 0; icol < proj_maps[side].num_cols(HashJoinProjection::INPUT); @@ -930,7 +939,7 @@ class HashJoinNode : public ExecNode, public TracedNode { // we will change it back to just the CPU's thread pool capacity. size_t num_threads = (GetCpuThreadPoolCapacity() + io::GetIOThreadPoolCapacity() + 1); - pushdown_context_.Init( + RETURN_NOT_OK(pushdown_context_.Init( this, num_threads, [ctx](std::function fn, std::function on_finished) { @@ -940,7 +949,7 @@ class HashJoinNode : public ExecNode, public TracedNode { return ctx->StartTaskGroup(task_group_id, num_tasks); }, [this](size_t thread_index) { return OnFiltersReceived(thread_index); }, - disable_bloom_filter_, use_sync_execution); + disable_bloom_filter_, use_sync_execution)); RETURN_NOT_OK(impl_->Init( ctx, join_type_, num_threads, &(schema_mgr_->proj_maps[0]), @@ -1037,7 +1046,7 @@ class HashJoinNode : public ExecNode, public TracedNode { BloomFilterPushdownContext pushdown_context_; }; -void BloomFilterPushdownContext::Init( +Status BloomFilterPushdownContext::Init( HashJoinNode* owner, size_t num_threads, RegisterTaskGroupCallback register_task_group_callback, StartTaskGroupCallback start_task_group_callback, @@ -1074,6 +1083,12 @@ void BloomFilterPushdownContext::Init( return eval_.on_finished_(thread_index, std::move(eval_.batches_)); }); start_task_group_callback_ = std::move(start_task_group_callback); + tld_.resize(num_threads); + for (auto& local_data : tld_) { + RETURN_NOT_OK(local_data.stack.Init(ctx_->memory_pool(), kTempStackUsage)); + } + + return Status::OK(); } Status BloomFilterPushdownContext::StartProducing(size_t thread_index) { @@ -1124,8 +1139,7 @@ Status BloomFilterPushdownContext::BuildBloomFilter_exec_task(size_t thread_inde } ARROW_ASSIGN_OR_RAISE(ExecBatch key_batch, ExecBatch::Make(std::move(key_columns))); - ARROW_ASSIGN_OR_RAISE(arrow::util::TempVectorStack * stack, - ctx_->GetTempStack(thread_index)); + arrow::util::TempVectorStack* stack = &tld_[thread_index].stack; arrow::util::TempVectorHolder hash_holder( stack, arrow::util::MiniBatch::kMiniBatchLength); uint32_t* hashes = hash_holder.mutable_data(); diff --git a/cpp/src/arrow/acero/hash_join_node_test.cc b/cpp/src/arrow/acero/hash_join_node_test.cc index 9c3dbc176ff4f..215b1e4d21125 100644 --- a/cpp/src/arrow/acero/hash_join_node_test.cc +++ b/cpp/src/arrow/acero/hash_join_node_test.cc @@ -28,6 +28,7 @@ #include "arrow/api.h" #include "arrow/compute/kernels/row_encoder_internal.h" #include "arrow/compute/kernels/test_util.h" +#include "arrow/compute/light_array_internal.h" #include "arrow/testing/extension_type.h" #include "arrow/testing/gtest_util.h" #include "arrow/testing/matchers.h" @@ -41,6 +42,7 @@ namespace arrow { using compute::call; using compute::default_exec_context; +using compute::ExecBatchBuilder; using compute::ExecSpan; using compute::field_ref; using compute::SortIndices; @@ -3201,5 +3203,55 @@ TEST(HashJoin, ChainedIntegerHashJoins) { } } +// Test that a large number of joins don't overflow the temp vector stack, like GH-39582 +// and GH-39951. +TEST(HashJoin, ManyJoins) { + // The idea of this case is to create many nested join nodes that may possibly cause + // recursive usage of temp vector stack. To make sure that the recursion happens: + // 1. A left-deep join tree is created so that the left-most (the final probe side) + // table will go through all the hash tables from the right side. + // 2. Left-outer join is used so that every join will increase the cardinality. + // 3. The left-most table contains rows of unique integers from 0 to N. + // 4. Each right table at level i contains two rows of integer i, so that the probing of + // each level will increase the result by one row. + // 5. The left-most table is a single batch of enough rows, so that at each level, the + // probing will accumulate enough result rows to have to output to the subsequent level + // before finishing the current batch (releasing the buffer allocated on the temp vector + // stack), which is essentially the recursive usage of the temp vector stack. + + // A fair number of joins to guarantee temp vector stack overflow before GH-41335. + const int num_joins = 64; + + // `ExecBatchBuilder::num_rows_max()` is the number of rows for swiss join to accumulate + // before outputting. + const int num_left_rows = ExecBatchBuilder::num_rows_max(); + ASSERT_OK_AND_ASSIGN( + auto left_batches, + MakeIntegerBatches({[](int row_id) -> int64_t { return row_id; }}, + schema({field("l_key", int32())}), + /*num_batches=*/1, /*batch_size=*/num_left_rows)); + Declaration root{"exec_batch_source", + ExecBatchSourceNodeOptions(std::move(left_batches.schema), + std::move(left_batches.batches))}; + + HashJoinNodeOptions join_opts(JoinType::LEFT_OUTER, /*left_keys=*/{"l_key"}, + /*right_keys=*/{"r_key"}); + + for (int i = 0; i < num_joins; ++i) { + ASSERT_OK_AND_ASSIGN(auto right_batches, + MakeIntegerBatches({[i](int) -> int64_t { return i; }}, + schema({field("r_key", int32())}), + /*num_batches=*/1, /*batch_size=*/2)); + Declaration table{"exec_batch_source", + ExecBatchSourceNodeOptions(std::move(right_batches.schema), + std::move(right_batches.batches))}; + + Declaration new_root{"hashjoin", {std::move(root), std::move(table)}, join_opts}; + root = std::move(new_root); + } + + ASSERT_OK_AND_ASSIGN(std::ignore, DeclarationToTable(std::move(root))); +} + } // namespace acero } // namespace arrow diff --git a/cpp/src/arrow/acero/order_by_impl.cc b/cpp/src/arrow/acero/order_by_impl.cc index 2c624f6ab895f..1165799fc6610 100644 --- a/cpp/src/arrow/acero/order_by_impl.cc +++ b/cpp/src/arrow/acero/order_by_impl.cc @@ -93,14 +93,14 @@ Result> OrderByImpl::MakeSort( ExecContext* ctx, const std::shared_ptr& output_schema, const SortOptions& options) { std::unique_ptr impl{new SortBasicImpl(ctx, output_schema, options)}; - return std::move(impl); + return impl; } Result> OrderByImpl::MakeSelectK( ExecContext* ctx, const std::shared_ptr& output_schema, const SelectKOptions& options) { std::unique_ptr impl{new SelectKBasicImpl(ctx, output_schema, options)}; - return std::move(impl); + return impl; } } // namespace acero diff --git a/cpp/src/arrow/acero/query_context.cc b/cpp/src/arrow/acero/query_context.cc index a27397d12079d..18beb19ab7f8b 100644 --- a/cpp/src/arrow/acero/query_context.cc +++ b/cpp/src/arrow/acero/query_context.cc @@ -40,8 +40,7 @@ QueryContext::QueryContext(QueryOptions opts, ExecContext exec_context) const CpuInfo* QueryContext::cpu_info() const { return CpuInfo::GetInstance(); } int64_t QueryContext::hardware_flags() const { return cpu_info()->hardware_flags(); } -Status QueryContext::Init(size_t max_num_threads, util::AsyncTaskScheduler* scheduler) { - tld_.resize(max_num_threads); +Status QueryContext::Init(util::AsyncTaskScheduler* scheduler) { async_scheduler_ = scheduler; return Status::OK(); } @@ -50,15 +49,6 @@ size_t QueryContext::GetThreadIndex() { return thread_indexer_(); } size_t QueryContext::max_concurrency() const { return thread_indexer_.Capacity(); } -Result QueryContext::GetTempStack(size_t thread_index) { - if (!tld_[thread_index].is_init) { - RETURN_NOT_OK(tld_[thread_index].stack.Init( - memory_pool(), 32 * util::MiniBatch::kMiniBatchLength * sizeof(uint64_t))); - tld_[thread_index].is_init = true; - } - return &tld_[thread_index].stack; -} - Result> QueryContext::BeginExternalTask(std::string_view name) { Future<> completion_future = Future<>::Make(); if (async_scheduler_->AddSimpleTask([completion_future] { return completion_future; }, diff --git a/cpp/src/arrow/acero/query_context.h b/cpp/src/arrow/acero/query_context.h index 9ea11679cba05..3eff299439828 100644 --- a/cpp/src/arrow/acero/query_context.h +++ b/cpp/src/arrow/acero/query_context.h @@ -38,7 +38,7 @@ class ARROW_ACERO_EXPORT QueryContext { QueryContext(QueryOptions opts = {}, ExecContext exec_context = *default_exec_context()); - Status Init(size_t max_num_threads, arrow::util::AsyncTaskScheduler* scheduler); + Status Init(arrow::util::AsyncTaskScheduler* scheduler); const ::arrow::internal::CpuInfo* cpu_info() const; int64_t hardware_flags() const; @@ -52,7 +52,6 @@ class ARROW_ACERO_EXPORT QueryContext { size_t GetThreadIndex(); size_t max_concurrency() const; - Result GetTempStack(size_t thread_index); /// \brief Start an external task /// @@ -145,11 +144,6 @@ class ARROW_ACERO_EXPORT QueryContext { std::unique_ptr task_scheduler_ = TaskScheduler::Make(); ThreadIndexer thread_indexer_; - struct ThreadLocalData { - bool is_init = false; - arrow::util::TempVectorStack stack; - }; - std::vector tld_; std::atomic in_flight_bytes_to_disk_{0}; }; diff --git a/cpp/src/arrow/acero/sink_node.cc b/cpp/src/arrow/acero/sink_node.cc index 4ab6b4537de02..66f447aa87f11 100644 --- a/cpp/src/arrow/acero/sink_node.cc +++ b/cpp/src/arrow/acero/sink_node.cc @@ -423,6 +423,7 @@ class ConsumingSinkNode : public ExecNode, std::atomic backpressure_counter_ = 0; std::unique_ptr sequencer_; }; + static Result MakeTableConsumingSinkNode(ExecPlan* plan, std::vector inputs, const ExecNodeOptions& options) { diff --git a/cpp/src/arrow/acero/sorted_merge_node.cc b/cpp/src/arrow/acero/sorted_merge_node.cc index 4d4565a6bb5e7..a71ac79efcc46 100644 --- a/cpp/src/arrow/acero/sorted_merge_node.cc +++ b/cpp/src/arrow/acero/sorted_merge_node.cc @@ -262,19 +262,22 @@ class SortedMergeNode : public ExecNode { : ExecNode(plan, inputs, GetInputLabels(inputs), std::move(output_schema)), ordering_(std::move(new_ordering)), input_counter(inputs_.size()), - output_counter(inputs_.size()), - process_thread() { + output_counter(inputs_.size()) +#ifdef ARROW_ENABLE_THREADING + , + process_thread() +#endif + { SetLabel("sorted_merge"); } ~SortedMergeNode() override { - process_queue.Push( - kPoisonPill); // poison pill - // We might create a temporary (such as to inspect the output - // schema), in which case there isn't anything to join + PushTask(kPoisonPill); +#ifdef ARROW_ENABLE_THREADING if (process_thread.joinable()) { process_thread.join(); } +#endif } static arrow::Result Make( @@ -355,10 +358,25 @@ class SortedMergeNode : public ExecNode { // InputState's ConcurrentQueue manages locking input_counter[index] += rb->num_rows(); ARROW_RETURN_NOT_OK(state[index]->Push(rb)); - process_queue.Push(kNewTask); + PushTask(kNewTask); return Status::OK(); } + void PushTask(bool ok) { +#ifdef ARROW_ENABLE_THREADING + process_queue.Push(ok); +#else + if (process_task.is_finished()) { + return; + } + if (ok == kNewTask) { + PollOnce(); + } else { + EndFromProcessThread(); + } +#endif + } + arrow::Status InputFinished(arrow::acero::ExecNode* input, int total_batches) override { ARROW_DCHECK(std_has(inputs_, input)); { @@ -368,7 +386,8 @@ class SortedMergeNode : public ExecNode { state.at(k)->set_total_batches(total_batches); } // Trigger a final process call for stragglers - process_queue.Push(kNewTask); + PushTask(kNewTask); + return Status::OK(); } @@ -379,13 +398,17 @@ class SortedMergeNode : public ExecNode { // Plan has already aborted. Do not start process thread return Status::OK(); } +#ifdef ARROW_ENABLE_THREADING process_thread = std::thread(&SortedMergeNode::StartPoller, this); +#endif return Status::OK(); } arrow::Status StopProducingImpl() override { +#ifdef ARROW_ENABLE_THREADING process_queue.Clear(); - process_queue.Push(kPoisonPill); +#endif + PushTask(kPoisonPill); return Status::OK(); } @@ -408,6 +431,7 @@ class SortedMergeNode : public ExecNode { << input_counter[i] << " != " << output_counter[i]; } +#ifdef ARROW_ENABLE_THREADING ARROW_UNUSED( plan_->query_context()->executor()->Spawn([this, st = std::move(st)]() mutable { Defer cleanup([this, &st]() { process_task.MarkFinished(st); }); @@ -415,6 +439,12 @@ class SortedMergeNode : public ExecNode { st = output_->InputFinished(this, batches_produced); } })); +#else + process_task.MarkFinished(st); + if (st.ok()) { + st = output_->InputFinished(this, batches_produced); + } +#endif } bool CheckEnded() { @@ -552,6 +582,7 @@ class SortedMergeNode : public ExecNode { return true; } +#ifdef ARROW_ENABLE_THREADING void EmitBatches() { while (true) { // Implementation note: If the queue is empty, we will block here @@ -567,6 +598,7 @@ class SortedMergeNode : public ExecNode { /// The entry point for processThread static void StartPoller(SortedMergeNode* node) { node->EmitBatches(); } +#endif arrow::Ordering ordering_; @@ -583,11 +615,13 @@ class SortedMergeNode : public ExecNode { std::atomic batches_produced{0}; +#ifdef ARROW_ENABLE_THREADING // Queue to trigger processing of a given input. False acts as a poison pill ConcurrentQueue process_queue; // Once StartProducing is called, we initialize this thread to poll the // input states and emit batches std::thread process_thread; +#endif arrow::Future<> process_task; // Map arg index --> completion counter diff --git a/cpp/src/arrow/acero/swiss_join.cc b/cpp/src/arrow/acero/swiss_join.cc index 542e943c4a82b..732deb72861d6 100644 --- a/cpp/src/arrow/acero/swiss_join.cc +++ b/cpp/src/arrow/acero/swiss_join.cc @@ -2470,6 +2470,8 @@ Status JoinProbeProcessor::OnFinished() { class SwissJoin : public HashJoinImpl { public: + static constexpr auto kTempStackUsage = 64 * arrow::util::MiniBatch::kMiniBatchLength; + Status Init(QueryContext* ctx, JoinType join_type, size_t num_threads, const HashJoinProjectionMaps* proj_map_left, const HashJoinProjectionMaps* proj_map_right, @@ -2513,6 +2515,7 @@ class SwissJoin : public HashJoinImpl { local_states_.resize(num_threads_); for (int i = 0; i < num_threads_; ++i) { + RETURN_NOT_OK(local_states_[i].stack.Init(pool_, kTempStackUsage)); local_states_[i].hash_table_ready = false; local_states_[i].num_output_batches = 0; local_states_[i].materialize.Init(pool_, proj_map_left, proj_map_right); @@ -2566,8 +2569,7 @@ class SwissJoin : public HashJoinImpl { ExecBatch keypayload_batch; ARROW_ASSIGN_OR_RAISE(keypayload_batch, KeyPayloadFromInput(/*side=*/0, &batch)); - ARROW_ASSIGN_OR_RAISE(arrow::util::TempVectorStack * temp_stack, - ctx_->GetTempStack(thread_index)); + arrow::util::TempVectorStack* temp_stack = &local_states_[thread_index].stack; return CancelIfNotOK( probe_processor_.OnNextBatch(thread_index, keypayload_batch, temp_stack, @@ -2679,8 +2681,7 @@ class SwissJoin : public HashJoinImpl { input_batch.values[schema->num_cols(HashJoinProjection::KEY) + icol]; } } - ARROW_ASSIGN_OR_RAISE(arrow::util::TempVectorStack * temp_stack, - ctx_->GetTempStack(thread_id)); + arrow::util::TempVectorStack* temp_stack = &local_states_[thread_id].stack; RETURN_NOT_OK(CancelIfNotOK(hash_table_build_.PushNextBatch( static_cast(thread_id), key_batch, no_payload ? nullptr : &payload_batch, temp_stack))); @@ -2715,8 +2716,7 @@ class SwissJoin : public HashJoinImpl { Status MergeFinished(size_t thread_id) { RETURN_NOT_OK(status()); - ARROW_ASSIGN_OR_RAISE(arrow::util::TempVectorStack * temp_stack, - ctx_->GetTempStack(thread_id)); + arrow::util::TempVectorStack* temp_stack = &local_states_[thread_id].stack; hash_table_build_.FinishPrtnMerge(temp_stack); return CancelIfNotOK(OnBuildHashTableFinished(static_cast(thread_id))); } @@ -2771,8 +2771,7 @@ class SwissJoin : public HashJoinImpl { std::min((task_id + 1) * kNumRowsPerScanTask, hash_table_.num_rows()); // Get thread index and related temp vector stack // - ARROW_ASSIGN_OR_RAISE(arrow::util::TempVectorStack * temp_stack, - ctx_->GetTempStack(thread_id)); + arrow::util::TempVectorStack* temp_stack = &local_states_[thread_id].stack; // Split into mini-batches // @@ -2949,6 +2948,7 @@ class SwissJoin : public HashJoinImpl { FinishedCallback finished_callback_; struct ThreadLocalState { + arrow::util::TempVectorStack stack; JoinResultMaterialize materialize; std::vector temp_column_arrays; int64_t num_output_batches; @@ -2985,7 +2985,7 @@ class SwissJoin : public HashJoinImpl { Result> HashJoinImpl::MakeSwiss() { std::unique_ptr impl{new SwissJoin()}; - return std::move(impl); + return impl; } } // namespace acero diff --git a/cpp/src/arrow/acero/task_util.cc b/cpp/src/arrow/acero/task_util.cc index 4d8e9ecf76597..85378eaeeb27c 100644 --- a/cpp/src/arrow/acero/task_util.cc +++ b/cpp/src/arrow/acero/task_util.cc @@ -424,7 +424,7 @@ void TaskSchedulerImpl::Abort(AbortContinuationImpl impl) { std::unique_ptr TaskScheduler::Make() { std::unique_ptr impl{new TaskSchedulerImpl()}; - return std::move(impl); + return impl; } } // namespace acero diff --git a/cpp/src/arrow/acero/tpch_node.cc b/cpp/src/arrow/acero/tpch_node.cc index 9797a082b49d2..137b62ad38a95 100644 --- a/cpp/src/arrow/acero/tpch_node.cc +++ b/cpp/src/arrow/acero/tpch_node.cc @@ -336,7 +336,7 @@ Result TpchPseudotext::GenerateComments(size_t num_comments, size_t min_l } ArrayData ad(utf8(), num_comments, {nullptr, std::move(offset_buffer), std::move(comment_buffer)}); - return std::move(ad); + return ad; } bool TpchPseudotext::GenerateWord(int64_t& offset, random::pcg32_fast& rng, char* arr, @@ -611,7 +611,7 @@ Result RandomVString(random::pcg32_fast& rng, int64_t num_rows, int32_t m for (int32_t i = 0; i < offsets[num_rows]; i++) str[i] = alpha_numerics[char_dist(rng)]; ArrayData ad(utf8(), num_rows, {nullptr, std::move(offset_buff), std::move(str_buff)}); - return std::move(ad); + return ad; } void GeneratePhoneNumber(char* out, random::pcg32_fast& rng, int32_t country) { @@ -677,7 +677,7 @@ class PartAndPartSupplierGenerator { if (!part_output_queue_.empty()) { ExecBatch batch = std::move(part_output_queue_.front()); part_output_queue_.pop(); - return std::move(batch); + return batch; } else if (part_rows_generated_ == part_rows_to_generate_) { return std::nullopt; } else { @@ -732,7 +732,7 @@ class PartAndPartSupplierGenerator { if (!partsupp_output_queue_.empty()) { ExecBatch result = std::move(partsupp_output_queue_.front()); partsupp_output_queue_.pop(); - return std::move(result); + return result; } } { @@ -1337,7 +1337,7 @@ class OrdersAndLineItemGenerator { if (!orders_output_queue_.empty()) { ExecBatch batch = std::move(orders_output_queue_.front()); orders_output_queue_.pop(); - return std::move(batch); + return batch; } else if (orders_rows_generated_ == orders_rows_to_generate_) { return std::nullopt; } else { @@ -1401,12 +1401,12 @@ class OrdersAndLineItemGenerator { if (from_queue) { ARROW_DCHECK(queued.length <= batch_size_); tld.first_batch_offset = queued.length; - if (queued.length == batch_size_) return std::move(queued); + if (queued.length == batch_size_) return queued; } { std::lock_guard lock(orders_output_queue_mutex_); if (orders_rows_generated_ == orders_rows_to_generate_) { - if (from_queue) return std::move(queued); + if (from_queue) return queued; return std::nullopt; } diff --git a/cpp/src/arrow/adapters/orc/adapter.cc b/cpp/src/arrow/adapters/orc/adapter.cc index 98784450b3cce..25759f8471365 100644 --- a/cpp/src/arrow/adapters/orc/adapter.cc +++ b/cpp/src/arrow/adapters/orc/adapter.cc @@ -566,7 +566,7 @@ Result> ORCFileReader::Open( #endif auto result = std::unique_ptr(new ORCFileReader()); RETURN_NOT_OK(result->impl_->Open(file, pool)); - return std::move(result); + return result; } Result> ORCFileReader::ReadMetadata() { @@ -837,7 +837,7 @@ Result> ORCFileWriter::Open( std::unique_ptr(new ORCFileWriter()); Status status = result->impl_->Open(output_stream, writer_options); RETURN_NOT_OK(status); - return std::move(result); + return result; } Status ORCFileWriter::Write(const Table& table) { return impl_->Write(table); } diff --git a/cpp/src/arrow/adapters/orc/util.cc b/cpp/src/arrow/adapters/orc/util.cc index 2a74bec1aa6fd..5bfe257ac7bad 100644 --- a/cpp/src/arrow/adapters/orc/util.cc +++ b/cpp/src/arrow/adapters/orc/util.cc @@ -1026,7 +1026,7 @@ Result> GetOrcType(const DataType& type) { SetAttributes(*it, orc_subtype.get()); out_type->addStructField(field_name, std::move(orc_subtype)); } - return std::move(out_type); + return out_type; } case Type::type::MAP: { const auto& key_field = checked_cast(type).key_field(); @@ -1048,7 +1048,7 @@ Result> GetOrcType(const DataType& type) { SetAttributes(arrow_field, orc_subtype.get()); out_type->addUnionChild(std::move(orc_subtype)); } - return std::move(out_type); + return out_type; } default: { return Status::NotImplemented("Unknown or unsupported Arrow type: ", @@ -1195,7 +1195,7 @@ Result> GetOrcType(const Schema& schema) { SetAttributes(field, orc_subtype.get()); out_type->addStructField(field->name(), std::move(orc_subtype)); } - return std::move(out_type); + return out_type; } Result> GetFieldMetadata( diff --git a/cpp/src/arrow/array/array_base.h b/cpp/src/arrow/array/array_base.h index 6411aebf80442..716ae0722069e 100644 --- a/cpp/src/arrow/array/array_base.h +++ b/cpp/src/arrow/array/array_base.h @@ -224,6 +224,14 @@ class ARROW_EXPORT Array { /// \return Status Status ValidateFull() const; + /// \brief Return the device_type that this array's data is allocated on + /// + /// This just delegates to calling device_type on the underlying ArrayData + /// object which backs this Array. + /// + /// \return DeviceAllocationType + DeviceAllocationType device_type() const { return data_->device_type(); } + protected: Array() = default; ARROW_DEFAULT_MOVE_AND_ASSIGN(Array); diff --git a/cpp/src/arrow/array/array_list_test.cc b/cpp/src/arrow/array/array_list_test.cc index 18afcc90d71f8..063b68706b313 100644 --- a/cpp/src/arrow/array/array_list_test.cc +++ b/cpp/src/arrow/array/array_list_test.cc @@ -1287,7 +1287,7 @@ TEST_F(TestMapArray, ValidateErrorNullKey) { } TEST_F(TestMapArray, FromArrays) { - std::shared_ptr offsets1, offsets2, offsets3, offsets4, keys, items; + std::shared_ptr offsets1, offsets2, offsets3, offsets4, offsets5, keys, items; std::vector offsets_is_valid3 = {true, false, true, true}; std::vector offsets_is_valid4 = {true, true, false, true}; @@ -1342,6 +1342,20 @@ TEST_F(TestMapArray, FromArrays) { // Zero-length offsets ASSERT_RAISES(Invalid, MapArray::FromArrays(offsets1->Slice(0, 0), keys, items, pool_)); + // Offseted offsets + ASSERT_OK_AND_ASSIGN(auto map5, + MapArray::FromArrays(offsets1->Slice(1), keys, items, pool_)); + ASSERT_OK(map5->Validate()); + + AssertArraysEqual(*expected1.Slice(1), *map5); + + std::vector offset5_values = {2, 2, 6}; + ArrayFromVector(offset5_values, &offsets5); + ASSERT_OK_AND_ASSIGN(auto map6, MapArray::FromArrays(offsets5, keys, items, pool_)); + ASSERT_OK(map6->Validate()); + + AssertArraysEqual(*map5, *map6); + // Offsets not the right type ASSERT_RAISES(TypeError, MapArray::FromArrays(keys, offsets1, items, pool_)); @@ -1354,6 +1368,23 @@ TEST_F(TestMapArray, FromArrays) { ASSERT_EQ(keys_with_null->length(), tmp_items->length()); ASSERT_RAISES(Invalid, MapArray::FromArrays(offsets1, keys_with_null, tmp_items, pool_)); + + // With null_bitmap + ASSERT_OK_AND_ASSIGN(auto map7, MapArray::FromArrays(offsets1, keys, items, pool_, + offsets3->data()->buffers[0])); + ASSERT_OK(map7->Validate()); + MapArray expected7(map_type, length, offsets1->data()->buffers[1], keys, items, + offsets3->data()->buffers[0], 1); + AssertArraysEqual(expected7, *map7); + + // Null bitmap and offset with null + ASSERT_RAISES(Invalid, MapArray::FromArrays(offsets3, keys, items, pool_, + offsets3->data()->buffers[0])); + + // Null bitmap and offset with offset + ASSERT_RAISES(NotImplemented, + MapArray::FromArrays(offsets1->Slice(2), keys, items, pool_, + offsets3->data()->buffers[0])); } TEST_F(TestMapArray, FromArraysEquality) { diff --git a/cpp/src/arrow/array/array_nested.cc b/cpp/src/arrow/array/array_nested.cc index 24e0dfb7081ac..2f6bca3d571ed 100644 --- a/cpp/src/arrow/array/array_nested.cc +++ b/cpp/src/arrow/array/array_nested.cc @@ -115,7 +115,7 @@ Result::ArrayType>> ListArrayFromArray return Status::TypeError("List offsets must be ", OffsetArrowType::type_name()); } - if (null_bitmap != nullptr && offsets.null_count() > 0) { + if (null_bitmap != nullptr && offsets.data()->MayHaveNulls()) { return Status::Invalid( "Ambiguous to specify both validity map and offsets with nulls"); } @@ -790,7 +790,7 @@ MapArray::MapArray(const std::shared_ptr& type, int64_t length, const std::shared_ptr& items, int64_t null_count, int64_t offset) { auto pair_data = ArrayData::Make(type->fields()[0]->type(), keys->data()->length, - {nullptr}, {keys->data(), items->data()}, 0, offset); + {nullptr}, {keys->data(), items->data()}, 0); auto map_data = ArrayData::Make(type, length, std::move(buffers), {pair_data}, null_count, offset); SetData(map_data); @@ -807,7 +807,7 @@ MapArray::MapArray(const std::shared_ptr& type, int64_t length, Result> MapArray::FromArraysInternal( std::shared_ptr type, const std::shared_ptr& offsets, const std::shared_ptr& keys, const std::shared_ptr& items, - MemoryPool* pool) { + MemoryPool* pool, const std::shared_ptr& null_bitmap) { using offset_type = typename MapType::offset_type; using OffsetArrowType = typename CTypeTraits::ArrowType; @@ -827,6 +827,15 @@ Result> MapArray::FromArraysInternal( return Status::Invalid("Map key and item arrays must be equal length"); } + if (null_bitmap != nullptr && offsets->data()->MayHaveNulls()) { + return Status::Invalid( + "Ambiguous to specify both validity map and offsets with nulls"); + } + + if (null_bitmap != nullptr && offsets->offset() != 0) { + return Status::NotImplemented("Null bitmap with offsets slice not supported."); + } + if (offsets->null_count() > 0) { ARROW_ASSIGN_OR_RAISE(auto buffers, CleanListOffsets(NULLPTR, *offsets, pool)); @@ -836,24 +845,32 @@ Result> MapArray::FromArraysInternal( using OffsetArrayType = typename TypeTraits::ArrayType; const auto& typed_offsets = checked_cast(*offsets); - auto buffers = BufferVector({nullptr, typed_offsets.values()}); + + BufferVector buffers; + int64_t null_count; + if (null_bitmap != nullptr) { + buffers = BufferVector({std::move(null_bitmap), typed_offsets.values()}); + null_count = null_bitmap->size(); + } else { + buffers = BufferVector({null_bitmap, typed_offsets.values()}); + null_count = 0; + } return std::make_shared(type, offsets->length() - 1, std::move(buffers), keys, - items, /*null_count=*/0, offsets->offset()); + items, /*null_count=*/null_count, offsets->offset()); } -Result> MapArray::FromArrays(const std::shared_ptr& offsets, - const std::shared_ptr& keys, - const std::shared_ptr& items, - MemoryPool* pool) { +Result> MapArray::FromArrays( + const std::shared_ptr& offsets, const std::shared_ptr& keys, + const std::shared_ptr& items, MemoryPool* pool, + const std::shared_ptr& null_bitmap) { return FromArraysInternal(std::make_shared(keys->type(), items->type()), - offsets, keys, items, pool); + offsets, keys, items, pool, null_bitmap); } -Result> MapArray::FromArrays(std::shared_ptr type, - const std::shared_ptr& offsets, - const std::shared_ptr& keys, - const std::shared_ptr& items, - MemoryPool* pool) { +Result> MapArray::FromArrays( + std::shared_ptr type, const std::shared_ptr& offsets, + const std::shared_ptr& keys, const std::shared_ptr& items, + MemoryPool* pool, const std::shared_ptr& null_bitmap) { if (type->id() != Type::MAP) { return Status::TypeError("Expected map type, got ", type->ToString()); } @@ -864,7 +881,7 @@ Result> MapArray::FromArrays(std::shared_ptr ty if (!map_type.item_type()->Equals(items->type())) { return Status::TypeError("Mismatching map items type"); } - return FromArraysInternal(std::move(type), offsets, keys, items, pool); + return FromArraysInternal(std::move(type), offsets, keys, items, pool, null_bitmap); } Status MapArray::ValidateChildData( @@ -876,13 +893,13 @@ Status MapArray::ValidateChildData( if (pair_data->type->id() != Type::STRUCT) { return Status::Invalid("Map array child array should have struct type"); } - if (pair_data->null_count != 0) { + if (pair_data->MayHaveNulls()) { return Status::Invalid("Map array child array should have no nulls"); } if (pair_data->child_data.size() != 2) { return Status::Invalid("Map array child array should have two fields"); } - if (pair_data->child_data[0]->null_count != 0) { + if (pair_data->child_data[0]->MayHaveNulls()) { return Status::Invalid("Map array keys array should have no nulls"); } return Status::OK(); @@ -1177,7 +1194,7 @@ void SparseUnionArray::SetData(std::shared_ptr data) { } void DenseUnionArray::SetData(const std::shared_ptr& data) { - this->UnionArray::SetData(std::move(data)); + this->UnionArray::SetData(data); ARROW_CHECK_EQ(data_->type->id(), Type::DENSE_UNION); ARROW_CHECK_EQ(data_->buffers.size(), 3); diff --git a/cpp/src/arrow/array/array_nested.h b/cpp/src/arrow/array/array_nested.h index 5744f5fcadf05..f96b6bd3b1346 100644 --- a/cpp/src/arrow/array/array_nested.h +++ b/cpp/src/arrow/array/array_nested.h @@ -532,15 +532,18 @@ class ARROW_EXPORT MapArray : public ListArray { /// \param[in] keys Array containing key values /// \param[in] items Array containing item values /// \param[in] pool MemoryPool in case new offsets array needs to be + /// \param[in] null_bitmap Optional validity bitmap /// allocated because of null values static Result> FromArrays( const std::shared_ptr& offsets, const std::shared_ptr& keys, - const std::shared_ptr& items, MemoryPool* pool = default_memory_pool()); + const std::shared_ptr& items, MemoryPool* pool = default_memory_pool(), + const std::shared_ptr& null_bitmap = NULLPTR); static Result> FromArrays( std::shared_ptr type, const std::shared_ptr& offsets, const std::shared_ptr& keys, const std::shared_ptr& items, - MemoryPool* pool = default_memory_pool()); + MemoryPool* pool = default_memory_pool(), + const std::shared_ptr& null_bitmap = NULLPTR); const MapType* map_type() const { return map_type_; } @@ -560,7 +563,7 @@ class ARROW_EXPORT MapArray : public ListArray { static Result> FromArraysInternal( std::shared_ptr type, const std::shared_ptr& offsets, const std::shared_ptr& keys, const std::shared_ptr& items, - MemoryPool* pool); + MemoryPool* pool, const std::shared_ptr& null_bitmap = NULLPTR); private: const MapType* map_type_; diff --git a/cpp/src/arrow/array/array_test.cc b/cpp/src/arrow/array/array_test.cc index af64908b59582..32806d9d2edb3 100644 --- a/cpp/src/arrow/array/array_test.cc +++ b/cpp/src/arrow/array/array_test.cc @@ -478,6 +478,7 @@ TEST_F(TestArray, TestMakeArrayOfNull) { ASSERT_EQ(array->type(), type); ASSERT_OK(array->ValidateFull()); ASSERT_EQ(array->length(), length); + ASSERT_EQ(array->device_type(), DeviceAllocationType::kCPU); if (is_union(type->id())) { ASSERT_EQ(array->null_count(), 0); ASSERT_EQ(array->ComputeLogicalNullCount(), length); @@ -719,6 +720,7 @@ TEST_F(TestArray, TestMakeArrayFromScalar) { ASSERT_OK(array->ValidateFull()); ASSERT_EQ(array->length(), length); ASSERT_EQ(array->null_count(), 0); + ASSERT_EQ(array->device_type(), DeviceAllocationType::kCPU); // test case for ARROW-13321 for (int64_t i : {int64_t{0}, length / 2, length - 1}) { @@ -744,6 +746,7 @@ TEST_F(TestArray, TestMakeArrayFromScalarSliced) { auto sliced = array->Slice(1, 4); ASSERT_EQ(sliced->length(), 4); ASSERT_EQ(sliced->null_count(), 0); + ASSERT_EQ(array->device_type(), DeviceAllocationType::kCPU); ARROW_EXPECT_OK(sliced->ValidateFull()); } } @@ -758,6 +761,7 @@ TEST_F(TestArray, TestMakeArrayFromDictionaryScalar) { ASSERT_OK(array->ValidateFull()); ASSERT_EQ(array->length(), 4); ASSERT_EQ(array->null_count(), 0); + ASSERT_EQ(array->device_type(), DeviceAllocationType::kCPU); for (int i = 0; i < 4; i++) { ASSERT_OK_AND_ASSIGN(auto item, array->GetScalar(i)); @@ -797,6 +801,7 @@ TEST_F(TestArray, TestMakeEmptyArray) { ASSERT_OK_AND_ASSIGN(auto array, MakeEmptyArray(type)); ASSERT_OK(array->ValidateFull()); ASSERT_EQ(array->length(), 0); + CheckSpanRoundTrip(*array); } } @@ -827,6 +832,9 @@ TEST_F(TestArray, TestFillFromScalar) { // GH-40069: Data-race when concurrent calling ArraySpan::FillFromScalar of the same // scalar instance. TEST_F(TestArray, TestConcurrentFillFromScalar) { +#ifndef ARROW_ENABLE_THREADING + GTEST_SKIP() << "Test requires threading support"; +#endif for (auto type : TestArrayUtilitiesAgainstTheseTypes()) { ARROW_SCOPED_TRACE("type = ", type->ToString()); for (auto seed : {0u, 0xdeadbeef, 42u}) { diff --git a/cpp/src/arrow/array/builder_base.h b/cpp/src/arrow/array/builder_base.h index e6c0b2d2387f2..ecd2136f5d20b 100644 --- a/cpp/src/arrow/array/builder_base.h +++ b/cpp/src/arrow/array/builder_base.h @@ -175,9 +175,9 @@ class ARROW_EXPORT ArrayBuilder { /// \brief Append a range of values from an array. /// /// The given array must be the same type as the builder. - virtual Status AppendArraySlice([[maybe_unused]] const ArraySpan& array, - [[maybe_unused]] int64_t offset, - [[maybe_unused]] int64_t length) { + virtual Status AppendArraySlice(const ArraySpan& ARROW_ARG_UNUSED(array), + int64_t ARROW_ARG_UNUSED(offset), + int64_t ARROW_ARG_UNUSED(length)) { return Status::NotImplemented("AppendArraySlice for builder for ", *type()); } @@ -332,7 +332,7 @@ inline Result> MakeBuilder( const std::shared_ptr& type, MemoryPool* pool = default_memory_pool()) { std::unique_ptr out; ARROW_RETURN_NOT_OK(MakeBuilder(pool, type, &out)); - return std::move(out); + return out; } /// \brief Construct an empty ArrayBuilder corresponding to the data @@ -346,7 +346,7 @@ inline Result> MakeBuilderExactIndex( const std::shared_ptr& type, MemoryPool* pool = default_memory_pool()) { std::unique_ptr out; ARROW_RETURN_NOT_OK(MakeBuilderExactIndex(pool, type, &out)); - return std::move(out); + return out; } /// \brief Construct an empty DictionaryBuilder initialized optionally @@ -365,7 +365,7 @@ inline Result> MakeDictionaryBuilder( MemoryPool* pool = default_memory_pool()) { std::unique_ptr out; ARROW_RETURN_NOT_OK(MakeDictionaryBuilder(pool, type, dictionary, &out)); - return std::move(out); + return out; } } // namespace arrow diff --git a/cpp/src/arrow/array/builder_nested.h b/cpp/src/arrow/array/builder_nested.h index 9f7b0fcdbce07..6089cf04d421f 100644 --- a/cpp/src/arrow/array/builder_nested.h +++ b/cpp/src/arrow/array/builder_nested.h @@ -248,7 +248,7 @@ class ARROW_EXPORT VarLengthListLikeBuilder : public ArrayBuilder { /// \brief Append dimensions for a single list slot. /// /// ListViewBuilder overrides this to also append the size. - virtual void UnsafeAppendDimensions(int64_t offset, [[maybe_unused]] int64_t size) { + virtual void UnsafeAppendDimensions(int64_t offset, int64_t ARROW_ARG_UNUSED(size)) { offsets_builder_.UnsafeAppend(static_cast(offset)); } diff --git a/cpp/src/arrow/array/builder_primitive.h b/cpp/src/arrow/array/builder_primitive.h index db8d2cbaabb61..de7af1b46bdee 100644 --- a/cpp/src/arrow/array/builder_primitive.h +++ b/cpp/src/arrow/array/builder_primitive.h @@ -32,10 +32,10 @@ namespace arrow { class ARROW_EXPORT NullBuilder : public ArrayBuilder { public: explicit NullBuilder(MemoryPool* pool = default_memory_pool(), - [[maybe_unused]] int64_t alignment = kDefaultBufferAlignment) + int64_t ARROW_ARG_UNUSED(alignment) = kDefaultBufferAlignment) : ArrayBuilder(pool) {} - explicit NullBuilder([[maybe_unused]] const std::shared_ptr& type, + explicit NullBuilder(const std::shared_ptr& ARROW_ARG_UNUSED(type), MemoryPool* pool = default_memory_pool(), int64_t alignment = kDefaultBufferAlignment) : NullBuilder(pool, alignment) {} diff --git a/cpp/src/arrow/array/builder_run_end.cc b/cpp/src/arrow/array/builder_run_end.cc index cff8d72952385..ed384123d8b87 100644 --- a/cpp/src/arrow/array/builder_run_end.cc +++ b/cpp/src/arrow/array/builder_run_end.cc @@ -162,8 +162,7 @@ Status RunCompressorBuilder::FinishInternal(std::shared_ptr* out) { RunEndEncodedBuilder::ValueRunBuilder::ValueRunBuilder( MemoryPool* pool, const std::shared_ptr& value_builder, const std::shared_ptr& value_type, RunEndEncodedBuilder& ree_builder) - : RunCompressorBuilder(pool, std::move(value_builder), std::move(value_type)), - ree_builder_(ree_builder) {} + : RunCompressorBuilder(pool, value_builder, value_type), ree_builder_(ree_builder) {} RunEndEncodedBuilder::RunEndEncodedBuilder( MemoryPool* pool, const std::shared_ptr& run_end_builder, diff --git a/cpp/src/arrow/array/concatenate.cc b/cpp/src/arrow/array/concatenate.cc index 44d58cc0bdebc..87e55246c78fe 100644 --- a/cpp/src/arrow/array/concatenate.cc +++ b/cpp/src/arrow/array/concatenate.cc @@ -522,7 +522,8 @@ class ConcatenateImpl { } out_data += data->length * index_width; } - return std::move(out); + // R build with openSUSE155 requires an explicit shared_ptr construction + return std::shared_ptr(std::move(out)); } Status Visit(const DictionaryType& d) { diff --git a/cpp/src/arrow/array/data.cc b/cpp/src/arrow/array/data.cc index ac828a9c35c67..83eeb56c496cf 100644 --- a/cpp/src/arrow/array/data.cc +++ b/cpp/src/arrow/array/data.cc @@ -224,6 +224,54 @@ int64_t ArrayData::ComputeLogicalNullCount() const { return ArraySpan(*this).ComputeLogicalNullCount(); } +DeviceAllocationType ArrayData::device_type() const { + // we're using 0 as a sentinel value for NOT YET ASSIGNED + // there is explicitly no constant DeviceAllocationType to represent + // the "UNASSIGNED" case as it is invalid for data to not have an + // assigned device type. If it's still 0 at the end, then we return + // CPU as the allocation device type + int type = 0; + for (const auto& buf : buffers) { + if (!buf) continue; +#ifdef NDEBUG + return buf->device_type(); +#else + if (type == 0) { + type = static_cast(buf->device_type()); + } else { + DCHECK_EQ(type, static_cast(buf->device_type())); + } +#endif + } + + for (const auto& child : child_data) { + if (!child) continue; +#ifdef NDEBUG + return child->device_type(); +#else + if (type == 0) { + type = static_cast(child->device_type()); + } else { + DCHECK_EQ(type, static_cast(child->device_type())); + } +#endif + } + + if (dictionary) { +#ifdef NDEBUG + return dictionary->device_type(); +#else + if (type == 0) { + type = static_cast(dictionary->device_type()); + } else { + DCHECK_EQ(type, static_cast(dictionary->device_type())); + } +#endif + } + + return type == 0 ? DeviceAllocationType::kCPU : static_cast(type); +} + // ---------------------------------------------------------------------- // Methods for ArraySpan diff --git a/cpp/src/arrow/array/data.h b/cpp/src/arrow/array/data.h index beec29789ad1e..e0508fe6980a7 100644 --- a/cpp/src/arrow/array/data.h +++ b/cpp/src/arrow/array/data.h @@ -101,6 +101,11 @@ struct ARROW_EXPORT ArrayData { int64_t null_count = kUnknownNullCount, int64_t offset = 0) : ArrayData(std::move(type), length, null_count, offset) { this->buffers = std::move(buffers); +#ifndef NDEBUG + // in debug mode, call the `device_type` function to trigger + // the DCHECKs that validate all the buffers are on the same device + ARROW_UNUSED(this->device_type()); +#endif } ArrayData(std::shared_ptr type, int64_t length, @@ -110,6 +115,12 @@ struct ARROW_EXPORT ArrayData { : ArrayData(std::move(type), length, null_count, offset) { this->buffers = std::move(buffers); this->child_data = std::move(child_data); +#ifndef NDEBUG + // in debug mode, call the `device_type` function to trigger + // the DCHECKs that validate all the buffers (including children) + // are on the same device + ARROW_UNUSED(this->device_type()); +#endif } static std::shared_ptr Make(std::shared_ptr type, int64_t length, @@ -358,6 +369,16 @@ struct ARROW_EXPORT ArrayData { /// \see GetNullCount int64_t ComputeLogicalNullCount() const; + /// \brief Return the device_type of the underlying buffers and children + /// + /// If there are no buffers in this ArrayData object, it just returns + /// DeviceAllocationType::kCPU as a default. We also assume that all buffers + /// should be allocated on the same device type and perform DCHECKs to confirm + /// this in debug mode. + /// + /// \return DeviceAllocationType + DeviceAllocationType device_type() const; + std::shared_ptr type; int64_t length = 0; mutable std::atomic null_count{0}; diff --git a/cpp/src/arrow/array/util.cc b/cpp/src/arrow/array/util.cc index bdba92c9a11fb..b56ea25f9e421 100644 --- a/cpp/src/arrow/array/util.cc +++ b/cpp/src/arrow/array/util.cc @@ -125,7 +125,8 @@ class ArrayDataEndianSwapper { for (int64_t i = 0; i < length; i++) { out_data[i] = bit_util::ByteSwap(in_data[i]); } - return std::move(out_buffer); + // R build with openSUSE155 requires an explicit shared_ptr construction + return std::shared_ptr(std::move(out_buffer)); } template @@ -548,7 +549,7 @@ class NullArrayFactory { } Status Visit(const StructType& type) { - for (int i = 0; i < type_->num_fields(); ++i) { + for (int i = 0; i < type.num_fields(); ++i) { ARROW_ASSIGN_OR_RAISE(out_->child_data[i], CreateChild(type, i, length_)); } return Status::OK(); diff --git a/cpp/src/arrow/buffer.cc b/cpp/src/arrow/buffer.cc index 1bd789b7cafe6..0eb22a9d1553d 100644 --- a/cpp/src/arrow/buffer.cc +++ b/cpp/src/arrow/buffer.cc @@ -41,7 +41,8 @@ Result> Buffer::CopySlice(const int64_t start, ARROW_ASSIGN_OR_RAISE(auto new_buffer, AllocateResizableBuffer(nbytes, pool)); std::memcpy(new_buffer->mutable_data(), data() + start, static_cast(nbytes)); - return std::move(new_buffer); + // R build with openSUSE155 requires an explicit shared_ptr construction + return std::shared_ptr(std::move(new_buffer)); } Buffer::Buffer() : Buffer(memory_pool::internal::kZeroSizeArea, 0) {} @@ -185,7 +186,8 @@ Result> AllocateBitmap(int64_t length, MemoryPool* pool) if (buf->size() > 0) { buf->mutable_data()[buf->size() - 1] = 0; } - return std::move(buf); + // R build with openSUSE155 requires an explicit shared_ptr construction + return std::shared_ptr(std::move(buf)); } Result> AllocateEmptyBitmap(int64_t length, MemoryPool* pool) { @@ -197,7 +199,8 @@ Result> AllocateEmptyBitmap(int64_t length, int64_t alig ARROW_ASSIGN_OR_RAISE(auto buf, AllocateBuffer(bit_util::BytesForBits(length), alignment, pool)); memset(buf->mutable_data(), 0, static_cast(buf->size())); - return std::move(buf); + // R build with openSUSE155 requires an explicit shared_ptr construction + return std::shared_ptr(std::move(buf)); } Status AllocateEmptyBitmap(int64_t length, std::shared_ptr* out) { @@ -219,7 +222,8 @@ Result> ConcatenateBuffers( out_data += buffer->size(); } } - return std::move(out); + // R build with openSUSE155 requires an explicit shared_ptr construction + return std::shared_ptr(std::move(out)); } } // namespace arrow diff --git a/cpp/src/arrow/c/bridge.cc b/cpp/src/arrow/c/bridge.cc index 8a530b3798d41..afb664c3bc258 100644 --- a/cpp/src/arrow/c/bridge.cc +++ b/cpp/src/arrow/c/bridge.cc @@ -1059,8 +1059,14 @@ struct SchemaImporter { ARROW_ASSIGN_OR_RAISE( type_, registered_ext_type->Deserialize(std::move(type_), metadata_.extension_serialized)); - RETURN_NOT_OK(metadata_.metadata->DeleteMany( - {metadata_.extension_name_index, metadata_.extension_serialized_index})); + // If metadata is present, delete both metadata keys (otherwise, just remove + // the extension name key) + if (metadata_.extension_serialized_index >= 0) { + RETURN_NOT_OK(metadata_.metadata->DeleteMany( + {metadata_.extension_name_index, metadata_.extension_serialized_index})); + } else { + RETURN_NOT_OK(metadata_.metadata->Delete(metadata_.extension_name_index)); + } } } @@ -1448,6 +1454,7 @@ namespace { // The ArrowArray is released on destruction. struct ImportedArrayData { struct ArrowArray array_; + DeviceAllocationType device_type_; std::shared_ptr device_sync_; ImportedArrayData() { @@ -1514,6 +1521,7 @@ struct ArrayImporter { recursion_level_ = 0; import_ = std::make_shared(); c_struct_ = &import_->array_; + import_->device_type_ = device_type_; ArrowArrayMove(src, c_struct_); return DoImport(); } @@ -1541,7 +1549,8 @@ struct ArrayImporter { "cannot be imported as RecordBatch"); } return RecordBatch::Make(std::move(schema), data_->length, - std::move(data_->child_data)); + std::move(data_->child_data), import_->device_type_, + import_->device_sync_); } Status ImportChild(const ArrayImporter* parent, struct ArrowArray* src) { @@ -1868,24 +1877,17 @@ struct ArrayImporter { template Status ImportStringValuesBuffer(int32_t offsets_buffer_id, int32_t buffer_id, int64_t byte_width = 1) { - if (device_type_ == DeviceAllocationType::kCPU) { - auto offsets = data_->GetValues(offsets_buffer_id); + int64_t buffer_size = 0; + if (c_struct_->length > 0) { + int64_t last_offset_value_offset = + (c_struct_->length + c_struct_->offset) * sizeof(OffsetType); + OffsetType last_offset_value; + RETURN_NOT_OK(MemoryManager::CopyBufferSliceToCPU( + data_->buffers[offsets_buffer_id], last_offset_value_offset, sizeof(OffsetType), + reinterpret_cast(&last_offset_value))); // Compute visible size of buffer - int64_t buffer_size = - (c_struct_->length > 0) ? byte_width * offsets[c_struct_->length] : 0; - return ImportBuffer(buffer_id, buffer_size); - } - - // we only need the value of the last offset so let's just copy that - // one value from device to host. - auto single_value_buf = - SliceBuffer(data_->buffers[offsets_buffer_id], - c_struct_->length * sizeof(OffsetType), sizeof(OffsetType)); - ARROW_ASSIGN_OR_RAISE( - auto cpubuf, Buffer::ViewOrCopy(single_value_buf, default_cpu_memory_manager())); - auto offsets = cpubuf->data_as(); - // Compute visible size of buffer - int64_t buffer_size = (c_struct_->length > 0) ? byte_width * offsets[0] : 0; + buffer_size = byte_width * last_offset_value; + } return ImportBuffer(buffer_id, buffer_size); } @@ -2041,6 +2043,23 @@ Status ExportStreamNext(const std::shared_ptr& src, int64_t i } } +// the int64_t i input here is unused, but exists simply to allow utilizing the +// overload of this with the version for ChunkedArrays. If we removed the int64_t +// from the signature despite it being unused, we wouldn't be able to leverage the +// overloading in the templated exporters. +Status ExportStreamNext(const std::shared_ptr& src, int64_t i, + struct ArrowDeviceArray* out_array) { + std::shared_ptr batch; + RETURN_NOT_OK(src->ReadNext(&batch)); + if (batch == nullptr) { + // End of stream + ArrowArrayMarkReleased(&out_array->array); + return Status::OK(); + } else { + return ExportDeviceRecordBatch(*batch, batch->GetSyncEvent(), out_array); + } +} + Status ExportStreamNext(const std::shared_ptr& src, int64_t i, struct ArrowArray* out_array) { if (i >= src->num_chunks()) { @@ -2052,8 +2071,27 @@ Status ExportStreamNext(const std::shared_ptr& src, int64_t i, } } -template +Status ExportStreamNext(const std::shared_ptr& src, int64_t i, + struct ArrowDeviceArray* out_array) { + if (i >= src->num_chunks()) { + // End of stream + ArrowArrayMarkReleased(&out_array->array); + return Status::OK(); + } else { + return ExportDeviceArray(*src->chunk(static_cast(i)), nullptr, out_array); + } +} + +template class ExportedArrayStream { + using StreamTraits = + std::conditional_t; + using StreamType = typename StreamTraits::CType; + using ArrayTraits = std::conditional_t; + using ArrayType = typename ArrayTraits::CType; + public: struct PrivateData { explicit PrivateData(std::shared_ptr reader) @@ -2067,13 +2105,13 @@ class ExportedArrayStream { ARROW_DISALLOW_COPY_AND_ASSIGN(PrivateData); }; - explicit ExportedArrayStream(struct ArrowArrayStream* stream) : stream_(stream) {} + explicit ExportedArrayStream(StreamType* stream) : stream_(stream) {} Status GetSchema(struct ArrowSchema* out_schema) { return ExportStreamSchema(reader(), out_schema); } - Status GetNext(struct ArrowArray* out_array) { + Status GetNext(ArrayType* out_array) { return ExportStreamNext(reader(), next_batch_num(), out_array); } @@ -2083,38 +2121,35 @@ class ExportedArrayStream { } void Release() { - if (ArrowArrayStreamIsReleased(stream_)) { + if (StreamTraits::IsReleasedFunc(stream_)) { return; } + DCHECK_NE(private_data(), nullptr); delete private_data(); - ArrowArrayStreamMarkReleased(stream_); + StreamTraits::MarkReleased(stream_); } // C-compatible callbacks - static int StaticGetSchema(struct ArrowArrayStream* stream, - struct ArrowSchema* out_schema) { + static int StaticGetSchema(StreamType* stream, struct ArrowSchema* out_schema) { ExportedArrayStream self{stream}; return self.ToCError(self.GetSchema(out_schema)); } - static int StaticGetNext(struct ArrowArrayStream* stream, - struct ArrowArray* out_array) { + static int StaticGetNext(StreamType* stream, ArrayType* out_array) { ExportedArrayStream self{stream}; return self.ToCError(self.GetNext(out_array)); } - static void StaticRelease(struct ArrowArrayStream* stream) { - ExportedArrayStream{stream}.Release(); - } + static void StaticRelease(StreamType* stream) { ExportedArrayStream{stream}.Release(); } - static const char* StaticGetLastError(struct ArrowArrayStream* stream) { + static const char* StaticGetLastError(StreamType* stream) { return ExportedArrayStream{stream}.GetLastError(); } - static Status Make(std::shared_ptr reader, struct ArrowArrayStream* out) { + static Status Make(std::shared_ptr reader, StreamType* out) { out->get_schema = ExportedArrayStream::StaticGetSchema; out->get_next = ExportedArrayStream::StaticGetNext; out->get_last_error = ExportedArrayStream::StaticGetLastError; @@ -2150,19 +2185,36 @@ class ExportedArrayStream { int64_t next_batch_num() { return private_data()->batch_num_++; } - struct ArrowArrayStream* stream_; + StreamType* stream_; }; } // namespace Status ExportRecordBatchReader(std::shared_ptr reader, struct ArrowArrayStream* out) { - return ExportedArrayStream::Make(std::move(reader), out); + memset(out, 0, sizeof(struct ArrowArrayStream)); + return ExportedArrayStream::Make(std::move(reader), out); } Status ExportChunkedArray(std::shared_ptr chunked_array, struct ArrowArrayStream* out) { - return ExportedArrayStream::Make(std::move(chunked_array), out); + memset(out, 0, sizeof(struct ArrowArrayStream)); + return ExportedArrayStream::Make(std::move(chunked_array), out); +} + +Status ExportDeviceRecordBatchReader(std::shared_ptr reader, + struct ArrowDeviceArrayStream* out) { + memset(out, 0, sizeof(struct ArrowDeviceArrayStream)); + out->device_type = static_cast(reader->device_type()); + return ExportedArrayStream::Make(std::move(reader), out); +} + +Status ExportDeviceChunkedArray(std::shared_ptr chunked_array, + DeviceAllocationType device_type, + struct ArrowDeviceArrayStream* out) { + memset(out, 0, sizeof(struct ArrowDeviceArrayStream)); + out->device_type = static_cast(device_type); + return ExportedArrayStream::Make(std::move(chunked_array), out); } ////////////////////////////////////////////////////////////////////////// @@ -2170,33 +2222,65 @@ Status ExportChunkedArray(std::shared_ptr chunked_array, namespace { +template class ArrayStreamReader { + protected: + using StreamTraits = + std::conditional_t; + using StreamType = typename StreamTraits::CType; + using ArrayTraits = std::conditional_t; + using ArrayType = typename ArrayTraits::CType; + public: - explicit ArrayStreamReader(struct ArrowArrayStream* stream) { - ArrowArrayStreamMove(stream, &stream_); - DCHECK(!ArrowArrayStreamIsReleased(&stream_)); + explicit ArrayStreamReader(StreamType* stream, + const DeviceMemoryMapper mapper = DefaultDeviceMemoryMapper) + : mapper_{std::move(mapper)} { + StreamTraits::MoveFunc(stream, &stream_); + DCHECK(!StreamTraits::IsReleasedFunc(&stream_)); } ~ArrayStreamReader() { ReleaseStream(); } void ReleaseStream() { - if (!ArrowArrayStreamIsReleased(&stream_)) { - ArrowArrayStreamRelease(&stream_); - } - DCHECK(ArrowArrayStreamIsReleased(&stream_)); + // all our trait release funcs check IsReleased so we don't + // need to repeat it here + StreamTraits::ReleaseFunc(&stream_); + DCHECK(StreamTraits::IsReleasedFunc(&stream_)); } protected: - Status ReadNextArrayInternal(struct ArrowArray* array) { - ArrowArrayMarkReleased(array); + Status ReadNextArrayInternal(ArrayType* array) { + ArrayTraits::MarkReleased(array); Status status = StatusFromCError(stream_.get_next(&stream_, array)); - if (!status.ok() && !ArrowArrayIsReleased(array)) { - ArrowArrayRelease(array); + if (!status.ok()) { + ArrayTraits::ReleaseFunc(array); } return status; } + Result> ImportRecordBatchInternal( + struct ArrowArray* array, std::shared_ptr schema) { + return ImportRecordBatch(array, schema); + } + + Result> ImportRecordBatchInternal( + struct ArrowDeviceArray* array, std::shared_ptr schema) { + return ImportDeviceRecordBatch(array, schema, mapper_); + } + + Result> ImportArrayInternal( + struct ArrowArray* array, std::shared_ptr type) { + return ImportArray(array, type); + } + + Result> ImportArrayInternal( + struct ArrowDeviceArray* array, std::shared_ptr type) { + return ImportDeviceArray(array, type, mapper_); + } + Result> ReadSchema() { struct ArrowSchema c_schema = {}; ARROW_RETURN_NOT_OK( @@ -2214,19 +2298,19 @@ class ArrayStreamReader { } Status CheckNotReleased() { - if (ArrowArrayStreamIsReleased(&stream_)) { + if (StreamTraits::IsReleasedFunc(&stream_)) { return Status::Invalid( "Attempt to read from a stream that has already been closed"); - } else { - return Status::OK(); } + + return Status::OK(); } Status StatusFromCError(int errno_like) const { return StatusFromCError(&stream_, errno_like); } - static Status StatusFromCError(struct ArrowArrayStream* stream, int errno_like) { + static Status StatusFromCError(StreamType* stream, int errno_like) { if (ARROW_PREDICT_TRUE(errno_like == 0)) { return Status::OK(); } @@ -2250,70 +2334,102 @@ class ArrayStreamReader { return {code, last_error ? std::string(last_error) : ""}; } + DeviceAllocationType get_device_type() const { + if constexpr (IsDevice) { + return static_cast(stream_.device_type); + } else { + return DeviceAllocationType::kCPU; + } + } + private: - mutable struct ArrowArrayStream stream_; + mutable StreamType stream_; + const DeviceMemoryMapper mapper_; }; -class ArrayStreamBatchReader : public RecordBatchReader, public ArrayStreamReader { +template +class ArrayStreamBatchReader : public RecordBatchReader, + public ArrayStreamReader { + using StreamTraits = + std::conditional_t; + using StreamType = typename StreamTraits::CType; + using ArrayTraits = std::conditional_t; + using ArrayType = typename ArrayTraits::CType; + public: - explicit ArrayStreamBatchReader(struct ArrowArrayStream* stream) - : ArrayStreamReader(stream) {} + explicit ArrayStreamBatchReader( + StreamType* stream, const DeviceMemoryMapper& mapper = DefaultDeviceMemoryMapper) + : ArrayStreamReader(stream, mapper) {} Status Init() { - ARROW_ASSIGN_OR_RAISE(schema_, ReadSchema()); + ARROW_ASSIGN_OR_RAISE(schema_, this->ReadSchema()); return Status::OK(); } std::shared_ptr schema() const override { return schema_; } Status ReadNext(std::shared_ptr* batch) override { - ARROW_RETURN_NOT_OK(CheckNotReleased()); + ARROW_RETURN_NOT_OK(this->CheckNotReleased()); - struct ArrowArray c_array; - ARROW_RETURN_NOT_OK(ReadNextArrayInternal(&c_array)); + ArrayType c_array; + ARROW_RETURN_NOT_OK(this->ReadNextArrayInternal(&c_array)); - if (ArrowArrayIsReleased(&c_array)) { + if (ArrayTraits::IsReleasedFunc(&c_array)) { // End of stream batch->reset(); return Status::OK(); } else { - return ImportRecordBatch(&c_array, schema_).Value(batch); + return this->ImportRecordBatchInternal(&c_array, schema_).Value(batch); } } Status Close() override { - ReleaseStream(); + this->ReleaseStream(); return Status::OK(); } + DeviceAllocationType device_type() const override { return this->get_device_type(); } + private: std::shared_ptr schema_; }; -class ArrayStreamArrayReader : public ArrayStreamReader { +template +class ArrayStreamArrayReader : public ArrayStreamReader { + using StreamTraits = + std::conditional_t; + using StreamType = typename StreamTraits::CType; + using ArrayTraits = std::conditional_t; + using ArrayType = typename ArrayTraits::CType; + public: - explicit ArrayStreamArrayReader(struct ArrowArrayStream* stream) - : ArrayStreamReader(stream) {} + explicit ArrayStreamArrayReader( + StreamType* stream, const DeviceMemoryMapper& mapper = DefaultDeviceMemoryMapper) + : ArrayStreamReader(stream, mapper) {} Status Init() { - ARROW_ASSIGN_OR_RAISE(field_, ReadField()); + ARROW_ASSIGN_OR_RAISE(field_, this->ReadField()); return Status::OK(); } std::shared_ptr data_type() const { return field_->type(); } Status ReadNext(std::shared_ptr* array) { - ARROW_RETURN_NOT_OK(CheckNotReleased()); + ARROW_RETURN_NOT_OK(this->CheckNotReleased()); - struct ArrowArray c_array; - ARROW_RETURN_NOT_OK(ReadNextArrayInternal(&c_array)); + ArrayType c_array; + ARROW_RETURN_NOT_OK(this->ReadNextArrayInternal(&c_array)); - if (ArrowArrayIsReleased(&c_array)) { + if (ArrayTraits::IsReleasedFunc(&c_array)) { // End of stream array->reset(); return Status::OK(); } else { - return ImportArray(&c_array, field_->type()).Value(array); + return this->ImportArrayInternal(&c_array, field_->type()).Value(array); } } @@ -2321,30 +2437,35 @@ class ArrayStreamArrayReader : public ArrayStreamReader { std::shared_ptr field_; }; -} // namespace - -Result> ImportRecordBatchReader( - struct ArrowArrayStream* stream) { - if (ArrowArrayStreamIsReleased(stream)) { - return Status::Invalid("Cannot import released ArrowArrayStream"); +template > +Result> ImportReader( + typename StreamTraits::CType* stream, + const DeviceMemoryMapper& mapper = DefaultDeviceMemoryMapper) { + if (StreamTraits::IsReleasedFunc(stream)) { + return Status::Invalid("Cannot import released Arrow Stream"); } - auto reader = std::make_shared(stream); + auto reader = std::make_shared>(stream, mapper); ARROW_RETURN_NOT_OK(reader->Init()); return reader; } -Result> ImportChunkedArray( - struct ArrowArrayStream* stream) { - if (ArrowArrayStreamIsReleased(stream)) { - return Status::Invalid("Cannot import released ArrowArrayStream"); +template > +Result> ImportChunked( + typename StreamTraits::CType* stream, + const DeviceMemoryMapper& mapper = DefaultDeviceMemoryMapper) { + if (StreamTraits::IsReleasedFunc(stream)) { + return Status::Invalid("Cannot import released Arrow Stream"); } - auto reader = std::make_shared(stream); + auto reader = std::make_shared>(stream, mapper); ARROW_RETURN_NOT_OK(reader->Init()); - std::shared_ptr data_type = reader->data_type(); - + auto data_type = reader->data_type(); ArrayVector chunks; std::shared_ptr chunk; while (true) { @@ -2360,4 +2481,26 @@ Result> ImportChunkedArray( return ChunkedArray::Make(std::move(chunks), std::move(data_type)); } +} // namespace + +Result> ImportRecordBatchReader( + struct ArrowArrayStream* stream) { + return ImportReader(stream); +} + +Result> ImportDeviceRecordBatchReader( + struct ArrowDeviceArrayStream* stream, const DeviceMemoryMapper& mapper) { + return ImportReader(stream, mapper); +} + +Result> ImportChunkedArray( + struct ArrowArrayStream* stream) { + return ImportChunked(stream); +} + +Result> ImportDeviceChunkedArray( + struct ArrowDeviceArrayStream* stream, const DeviceMemoryMapper& mapper) { + return ImportChunked(stream, mapper); +} + } // namespace arrow diff --git a/cpp/src/arrow/c/bridge.h b/cpp/src/arrow/c/bridge.h index 74a302be4c27d..45367e4f93062 100644 --- a/cpp/src/arrow/c/bridge.h +++ b/cpp/src/arrow/c/bridge.h @@ -321,6 +321,31 @@ ARROW_EXPORT Status ExportChunkedArray(std::shared_ptr chunked_array, struct ArrowArrayStream* out); +/// \brief Export C++ RecordBatchReader using the C device stream interface +/// +/// The resulting ArrowDeviceArrayStream struct keeps the record batch reader +/// alive until its release callback is called by the consumer. The device +/// type is determined by calling device_type() on the RecordBatchReader. +/// +/// \param[in] reader RecordBatchReader object to export +/// \param[out] out C struct to export the stream to +ARROW_EXPORT +Status ExportDeviceRecordBatchReader(std::shared_ptr reader, + struct ArrowDeviceArrayStream* out); + +/// \brief Export C++ ChunkedArray using the C device data interface format. +/// +/// The resulting ArrowDeviceArrayStream keeps the chunked array data and buffers +/// alive until its release callback is called by the consumer. +/// +/// \param[in] chunked_array ChunkedArray object to export +/// \param[in] device_type the device type the data is located on +/// \param[out] out C struct to export the stream to +ARROW_EXPORT +Status ExportDeviceChunkedArray(std::shared_ptr chunked_array, + DeviceAllocationType device_type, + struct ArrowDeviceArrayStream* out); + /// \brief Import C++ RecordBatchReader from the C stream interface. /// /// The ArrowArrayStream struct has its contents moved to a private object @@ -343,6 +368,42 @@ Result> ImportRecordBatchReader( ARROW_EXPORT Result> ImportChunkedArray(struct ArrowArrayStream* stream); +/// \brief Import C++ RecordBatchReader from the C device stream interface +/// +/// The ArrowDeviceArrayStream struct has its contents moved to a private object +/// held alive by the resulting record batch reader. +/// +/// \note If there was a required sync event, sync events are accessible by individual +/// buffers of columns. We are not yet bubbling the sync events from the buffers up to +/// the `GetSyncEvent` method of an imported RecordBatch. This will be added in a future +/// update. +/// +/// \param[in,out] stream C device stream interface struct +/// \param[in] mapper mapping from device type and ID to memory manager +/// \return Imported RecordBatchReader object +ARROW_EXPORT +Result> ImportDeviceRecordBatchReader( + struct ArrowDeviceArrayStream* stream, + const DeviceMemoryMapper& mapper = DefaultDeviceMemoryMapper); + +/// \brief Import C++ ChunkedArray from the C device stream interface +/// +/// The ArrowDeviceArrayStream struct has its contents moved to a private object, +/// is consumed in its entirety, and released before returning all chunks as a +/// ChunkedArray. +/// +/// \note Any chunks that require synchronization for their device memory will have +/// the SyncEvent objects available by checking the individual buffers of each chunk. +/// These SyncEvents should be checked before accessing the data in those buffers. +/// +/// \param[in,out] stream C device stream interface struct +/// \param[in] mapper mapping from device type and ID to memory manager +/// \return Imported ChunkedArray object +ARROW_EXPORT +Result> ImportDeviceChunkedArray( + struct ArrowDeviceArrayStream* stream, + const DeviceMemoryMapper& mapper = DefaultDeviceMemoryMapper); + /// @} } // namespace arrow diff --git a/cpp/src/arrow/c/bridge_test.cc b/cpp/src/arrow/c/bridge_test.cc index d64fe67accde0..09bb524adbdf0 100644 --- a/cpp/src/arrow/c/bridge_test.cc +++ b/cpp/src/arrow/c/bridge_test.cc @@ -53,11 +53,15 @@ namespace arrow { +using internal::ArrayDeviceExportTraits; +using internal::ArrayDeviceStreamExportTraits; using internal::ArrayExportGuard; using internal::ArrayExportTraits; using internal::ArrayStreamExportGuard; using internal::ArrayStreamExportTraits; using internal::checked_cast; +using internal::DeviceArrayExportGuard; +using internal::DeviceArrayStreamExportGuard; using internal::SchemaExportGuard; using internal::SchemaExportTraits; using internal::Zip; @@ -1358,7 +1362,7 @@ class MyMemoryManager : public CPUMemoryManager { if (buf.size() > 0) { memcpy(dest->mutable_data(), buf.data(), static_cast(buf.size())); } - return std::move(dest); + return dest; } }; @@ -4098,6 +4102,23 @@ TEST_F(TestArrayRoundtrip, RegisteredExtension) { TestWithArrayFactory(NestedFactory(ExampleDictExtension)); } +TEST_F(TestArrayRoundtrip, RegisteredExtensionNoMetadata) { + auto ext_type = std::make_shared(); + ExtensionTypeGuard guard(ext_type); + + auto ext_metadata = + KeyValueMetadata::Make({"ARROW:extension:name"}, {ext_type->extension_name()}); + auto ext_field = field("", ext_type->storage_type(), true, std::move(ext_metadata)); + + struct ArrowSchema c_schema {}; + SchemaExportGuard schema_guard(&c_schema); + ASSERT_OK(ExportField(*ext_field, &c_schema)); + + ASSERT_OK_AND_ASSIGN(auto ext_type_roundtrip, ImportType(&c_schema)); + ASSERT_EQ(ext_type_roundtrip->id(), Type::EXTENSION); + AssertTypeEqual(ext_type_roundtrip, ext_type); +} + TEST_F(TestArrayRoundtrip, UnregisteredExtension) { auto StorageExtractor = [](ArrayFactory factory) { return [factory]() -> Result> { @@ -4746,4 +4767,516 @@ TEST_F(TestArrayStreamRoundtrip, ChunkedArrayRoundtripEmpty) { }); } +//////////////////////////////////////////////////////////////////////////// +// Array device stream export tests + +class TestArrayDeviceStreamExport : public BaseArrayStreamTest { + public: + void AssertStreamSchema(struct ArrowDeviceArrayStream* c_stream, + const Schema& expected) { + struct ArrowSchema c_schema; + ASSERT_EQ(0, c_stream->get_schema(c_stream, &c_schema)); + + SchemaExportGuard schema_guard(&c_schema); + ASSERT_FALSE(ArrowSchemaIsReleased(&c_schema)); + ASSERT_OK_AND_ASSIGN(auto schema, ImportSchema(&c_schema)); + AssertSchemaEqual(expected, *schema, /*check_metadata=*/true); + } + + void AssertStreamEnd(struct ArrowDeviceArrayStream* c_stream) { + struct ArrowDeviceArray c_array; + ASSERT_EQ(0, c_stream->get_next(c_stream, &c_array)); + + DeviceArrayExportGuard guard(&c_array); + ASSERT_TRUE(ArrowDeviceArrayIsReleased(&c_array)); + } + + void AssertStreamNext(struct ArrowDeviceArrayStream* c_stream, + const RecordBatch& expected) { + struct ArrowDeviceArray c_array; + ASSERT_EQ(0, c_stream->get_next(c_stream, &c_array)); + + DeviceArrayExportGuard guard(&c_array); + ASSERT_FALSE(ArrowDeviceArrayIsReleased(&c_array)); + + ASSERT_OK_AND_ASSIGN(auto batch, + ImportDeviceRecordBatch(&c_array, expected.schema(), + TestDeviceArrayRoundtrip::DeviceMapper)); + AssertBatchesEqual(expected, *batch); + } + + void AssertStreamNext(struct ArrowDeviceArrayStream* c_stream, const Array& expected) { + struct ArrowDeviceArray c_array; + ASSERT_EQ(0, c_stream->get_next(c_stream, &c_array)); + + DeviceArrayExportGuard guard(&c_array); + ASSERT_FALSE(ArrowDeviceArrayIsReleased(&c_array)); + + ASSERT_OK_AND_ASSIGN(auto array, + ImportDeviceArray(&c_array, expected.type(), + TestDeviceArrayRoundtrip::DeviceMapper)); + AssertArraysEqual(expected, *array); + } + + static Result> ToDeviceData( + const std::shared_ptr& mm, const ArrayData& data) { + arrow::BufferVector buffers; + for (const auto& buf : data.buffers) { + if (buf) { + ARROW_ASSIGN_OR_RAISE(auto dest, mm->CopyBuffer(buf, mm)); + buffers.push_back(dest); + } else { + buffers.push_back(nullptr); + } + } + + arrow::ArrayDataVector children; + for (const auto& child : data.child_data) { + ARROW_ASSIGN_OR_RAISE(auto dest, ToDeviceData(mm, *child)); + children.push_back(dest); + } + + return ArrayData::Make(data.type, data.length, buffers, children, data.null_count, + data.offset); + } + + static Result> ToDevice(const std::shared_ptr& mm, + const ArrayData& data) { + ARROW_ASSIGN_OR_RAISE(auto result, ToDeviceData(mm, data)); + return MakeArray(result); + } +}; + +TEST_F(TestArrayDeviceStreamExport, Empty) { + auto schema = arrow::schema({field("ints", int32())}); + auto batches = MakeBatches(schema, {}); + ASSERT_OK_AND_ASSIGN( + auto reader, + RecordBatchReader::Make(batches, schema, + static_cast(kMyDeviceType))); + + struct ArrowDeviceArrayStream c_stream; + ASSERT_OK(ExportDeviceRecordBatchReader(reader, &c_stream)); + DeviceArrayStreamExportGuard guard(&c_stream); + + ASSERT_FALSE(ArrowDeviceArrayStreamIsReleased(&c_stream)); + ASSERT_EQ(kMyDeviceType, c_stream.device_type); + AssertStreamSchema(&c_stream, *schema); + AssertStreamEnd(&c_stream); + AssertStreamEnd(&c_stream); +} + +TEST_F(TestArrayDeviceStreamExport, Simple) { + std::shared_ptr device = std::make_shared(1); + auto mm = device->default_memory_manager(); + + ASSERT_OK_AND_ASSIGN(auto arr1, + ToDevice(mm, *ArrayFromJSON(int32(), "[1, 2]")->data())); + ASSERT_EQ(device->device_type(), arr1->device_type()); + ASSERT_OK_AND_ASSIGN(auto arr2, + ToDevice(mm, *ArrayFromJSON(int32(), "[4, 5, null]")->data())); + ASSERT_EQ(device->device_type(), arr2->device_type()); + auto schema = arrow::schema({field("ints", int32())}); + auto batches = MakeBatches(schema, {arr1, arr2}); + ASSERT_OK_AND_ASSIGN(auto reader, + RecordBatchReader::Make(batches, schema, device->device_type())); + + struct ArrowDeviceArrayStream c_stream; + + ASSERT_OK(ExportDeviceRecordBatchReader(reader, &c_stream)); + DeviceArrayStreamExportGuard guard(&c_stream); + + ASSERT_FALSE(ArrowDeviceArrayStreamIsReleased(&c_stream)); + AssertStreamSchema(&c_stream, *schema); + ASSERT_EQ(kMyDeviceType, c_stream.device_type); + AssertStreamNext(&c_stream, *batches[0]); + AssertStreamNext(&c_stream, *batches[1]); + AssertStreamEnd(&c_stream); + AssertStreamEnd(&c_stream); +} + +TEST_F(TestArrayDeviceStreamExport, ArrayLifetime) { + std::shared_ptr device = std::make_shared(1); + auto mm = device->default_memory_manager(); + + ASSERT_OK_AND_ASSIGN(auto arr1, + ToDevice(mm, *ArrayFromJSON(int32(), "[1, 2]")->data())); + ASSERT_EQ(device->device_type(), arr1->device_type()); + ASSERT_OK_AND_ASSIGN(auto arr2, + ToDevice(mm, *ArrayFromJSON(int32(), "[4, 5, null]")->data())); + ASSERT_EQ(device->device_type(), arr2->device_type()); + auto schema = arrow::schema({field("ints", int32())}); + auto batches = MakeBatches(schema, {arr1, arr2}); + ASSERT_OK_AND_ASSIGN(auto reader, + RecordBatchReader::Make(batches, schema, device->device_type())); + + struct ArrowDeviceArrayStream c_stream; + struct ArrowSchema c_schema; + struct ArrowDeviceArray c_array0, c_array1; + + ASSERT_OK(ExportDeviceRecordBatchReader(reader, &c_stream)); + { + DeviceArrayStreamExportGuard guard(&c_stream); + ASSERT_FALSE(ArrowDeviceArrayStreamIsReleased(&c_stream)); + ASSERT_EQ(kMyDeviceType, c_stream.device_type); + + ASSERT_EQ(0, c_stream.get_schema(&c_stream, &c_schema)); + ASSERT_EQ(0, c_stream.get_next(&c_stream, &c_array0)); + ASSERT_EQ(0, c_stream.get_next(&c_stream, &c_array1)); + AssertStreamEnd(&c_stream); + } + + DeviceArrayExportGuard guard0(&c_array0), guard1(&c_array1); + + { + SchemaExportGuard schema_guard(&c_schema); + ASSERT_OK_AND_ASSIGN(auto got_schema, ImportSchema(&c_schema)); + AssertSchemaEqual(*schema, *got_schema, /*check_metadata=*/true); + } + + ASSERT_EQ(kMyDeviceType, c_array0.device_type); + ASSERT_EQ(kMyDeviceType, c_array1.device_type); + + ASSERT_GT(pool_->bytes_allocated(), orig_allocated_); + ASSERT_OK_AND_ASSIGN( + auto batch, + ImportDeviceRecordBatch(&c_array1, schema, TestDeviceArrayRoundtrip::DeviceMapper)); + AssertBatchesEqual(*batches[1], *batch); + ASSERT_EQ(device->device_type(), batch->device_type()); + ASSERT_OK_AND_ASSIGN( + batch, + ImportDeviceRecordBatch(&c_array0, schema, TestDeviceArrayRoundtrip::DeviceMapper)); + AssertBatchesEqual(*batches[0], *batch); + ASSERT_EQ(device->device_type(), batch->device_type()); +} + +TEST_F(TestArrayDeviceStreamExport, Errors) { + auto reader = + std::make_shared(Status::Invalid("some example error")); + + struct ArrowDeviceArrayStream c_stream; + + ASSERT_OK(ExportDeviceRecordBatchReader(reader, &c_stream)); + DeviceArrayStreamExportGuard guard(&c_stream); + + struct ArrowSchema c_schema; + ASSERT_EQ(0, c_stream.get_schema(&c_stream, &c_schema)); + ASSERT_FALSE(ArrowSchemaIsReleased(&c_schema)); + { + SchemaExportGuard schema_guard(&c_schema); + ASSERT_OK_AND_ASSIGN(auto schema, ImportSchema(&c_schema)); + AssertSchemaEqual(schema, arrow::schema({}), /*check_metadata=*/true); + } + + struct ArrowDeviceArray c_array; + ASSERT_EQ(EINVAL, c_stream.get_next(&c_stream, &c_array)); +} + +TEST_F(TestArrayDeviceStreamExport, ChunkedArrayExportEmpty) { + ASSERT_OK_AND_ASSIGN(auto chunked_array, ChunkedArray::Make({}, int32())); + + struct ArrowDeviceArrayStream c_stream; + struct ArrowSchema c_schema; + + ASSERT_OK(ExportDeviceChunkedArray( + chunked_array, static_cast(kMyDeviceType), &c_stream)); + DeviceArrayStreamExportGuard guard(&c_stream); + + { + DeviceArrayStreamExportGuard guard(&c_stream); + ASSERT_FALSE(ArrowDeviceArrayStreamIsReleased(&c_stream)); + + ASSERT_EQ(kMyDeviceType, c_stream.device_type); + ASSERT_EQ(0, c_stream.get_schema(&c_stream, &c_schema)); + AssertStreamEnd(&c_stream); + } + + { + SchemaExportGuard schema_guard(&c_schema); + ASSERT_OK_AND_ASSIGN(auto got_type, ImportType(&c_schema)); + AssertTypeEqual(*chunked_array->type(), *got_type); + } +} + +TEST_F(TestArrayDeviceStreamExport, ChunkedArrayExport) { + std::shared_ptr device = std::make_shared(1); + auto mm = device->default_memory_manager(); + + ASSERT_OK_AND_ASSIGN(auto arr1, + ToDevice(mm, *ArrayFromJSON(int32(), "[1, 2]")->data())); + ASSERT_EQ(device->device_type(), arr1->device_type()); + ASSERT_OK_AND_ASSIGN(auto arr2, + ToDevice(mm, *ArrayFromJSON(int32(), "[4, 5, null]")->data())); + ASSERT_EQ(device->device_type(), arr2->device_type()); + + ASSERT_OK_AND_ASSIGN(auto chunked_array, ChunkedArray::Make({arr1, arr2})); + + struct ArrowDeviceArrayStream c_stream; + struct ArrowSchema c_schema; + struct ArrowDeviceArray c_array0, c_array1; + + ASSERT_OK(ExportDeviceChunkedArray(chunked_array, device->device_type(), &c_stream)); + DeviceArrayStreamExportGuard guard(&c_stream); + + { + DeviceArrayStreamExportGuard guard(&c_stream); + ASSERT_FALSE(ArrowDeviceArrayStreamIsReleased(&c_stream)); + ASSERT_EQ(kMyDeviceType, c_stream.device_type); + + ASSERT_EQ(0, c_stream.get_schema(&c_stream, &c_schema)); + ASSERT_EQ(0, c_stream.get_next(&c_stream, &c_array0)); + ASSERT_EQ(0, c_stream.get_next(&c_stream, &c_array1)); + AssertStreamEnd(&c_stream); + } + + DeviceArrayExportGuard guard0(&c_array0), guard1(&c_array1); + + { + SchemaExportGuard schema_guard(&c_schema); + ASSERT_OK_AND_ASSIGN(auto got_type, ImportType(&c_schema)); + AssertTypeEqual(*chunked_array->type(), *got_type); + } + + ASSERT_EQ(kMyDeviceType, c_array0.device_type); + ASSERT_EQ(kMyDeviceType, c_array1.device_type); + + ASSERT_GT(pool_->bytes_allocated(), orig_allocated_); + ASSERT_OK_AND_ASSIGN(auto array, + ImportDeviceArray(&c_array0, chunked_array->type(), + TestDeviceArrayRoundtrip::DeviceMapper)); + ASSERT_EQ(device->device_type(), array->device_type()); + AssertArraysEqual(*chunked_array->chunk(0), *array); + ASSERT_OK_AND_ASSIGN(array, ImportDeviceArray(&c_array1, chunked_array->type(), + TestDeviceArrayRoundtrip::DeviceMapper)); + ASSERT_EQ(device->device_type(), array->device_type()); + AssertArraysEqual(*chunked_array->chunk(1), *array); +} + +//////////////////////////////////////////////////////////////////////////// +// Array device stream roundtrip tests + +class TestArrayDeviceStreamRoundtrip : public BaseArrayStreamTest { + public: + static Result> ToDeviceData( + const std::shared_ptr& mm, const ArrayData& data) { + arrow::BufferVector buffers; + for (const auto& buf : data.buffers) { + if (buf) { + ARROW_ASSIGN_OR_RAISE(auto dest, mm->CopyBuffer(buf, mm)); + buffers.push_back(dest); + } else { + buffers.push_back(nullptr); + } + } + + arrow::ArrayDataVector children; + for (const auto& child : data.child_data) { + ARROW_ASSIGN_OR_RAISE(auto dest, ToDeviceData(mm, *child)); + children.push_back(dest); + } + + return ArrayData::Make(data.type, data.length, buffers, children, data.null_count, + data.offset); + } + + static Result> ToDevice(const std::shared_ptr& mm, + const ArrayData& data) { + ARROW_ASSIGN_OR_RAISE(auto result, ToDeviceData(mm, data)); + return MakeArray(result); + } + + void Roundtrip(std::shared_ptr* reader, + struct ArrowDeviceArrayStream* c_stream) { + ASSERT_OK(ExportDeviceRecordBatchReader(*reader, c_stream)); + ASSERT_FALSE(ArrowDeviceArrayStreamIsReleased(c_stream)); + + ASSERT_OK_AND_ASSIGN( + auto got_reader, + ImportDeviceRecordBatchReader(c_stream, TestDeviceArrayRoundtrip::DeviceMapper)); + *reader = std::move(got_reader); + } + + void Roundtrip( + std::shared_ptr reader, + std::function&)> check_func) { + ArrowDeviceArrayStream c_stream; + + // NOTE: ReleaseCallback<> is not immediately usable with ArrowDeviceArayStream + // because get_next and get_schema need the original private_data. + std::weak_ptr weak_reader(reader); + ASSERT_EQ(weak_reader.use_count(), 1); // Expiration check will fail otherwise + + ASSERT_OK(ExportDeviceRecordBatchReader(std::move(reader), &c_stream)); + ASSERT_FALSE(ArrowDeviceArrayStreamIsReleased(&c_stream)); + + { + ASSERT_OK_AND_ASSIGN(auto new_reader, + ImportDeviceRecordBatchReader( + &c_stream, TestDeviceArrayRoundtrip::DeviceMapper)); + // stream was moved + ASSERT_TRUE(ArrowDeviceArrayStreamIsReleased(&c_stream)); + ASSERT_FALSE(weak_reader.expired()); + + check_func(new_reader); + } + // Stream was released when `new_reader` was destroyed + ASSERT_TRUE(weak_reader.expired()); + } + + void Roundtrip(std::shared_ptr src, + std::function&)> check_func) { + ArrowDeviceArrayStream c_stream; + + // One original copy to compare the result, one copy held by the stream + std::weak_ptr weak_src(src); + int64_t initial_use_count = weak_src.use_count(); + + ASSERT_OK(ExportDeviceChunkedArray( + std::move(src), static_cast(kMyDeviceType), &c_stream)); + ASSERT_FALSE(ArrowDeviceArrayStreamIsReleased(&c_stream)); + ASSERT_EQ(kMyDeviceType, c_stream.device_type); + + { + ASSERT_OK_AND_ASSIGN( + auto dst, + ImportDeviceChunkedArray(&c_stream, TestDeviceArrayRoundtrip::DeviceMapper)); + // Stream was moved, consumed, and released + ASSERT_TRUE(ArrowDeviceArrayStreamIsReleased(&c_stream)); + + // Stream was released by ImportDeviceChunkedArray but original copy remains + ASSERT_EQ(weak_src.use_count(), initial_use_count - 1); + + check_func(dst); + } + } + + void AssertReaderNext(const std::shared_ptr& reader, + const RecordBatch& expected) { + ASSERT_OK_AND_ASSIGN(auto batch, reader->Next()); + ASSERT_NE(batch, nullptr); + ASSERT_EQ(static_cast(kMyDeviceType), batch->device_type()); + AssertBatchesEqual(expected, *batch); + } + + void AssertReaderEnd(const std::shared_ptr& reader) { + ASSERT_OK_AND_ASSIGN(auto batch, reader->Next()); + ASSERT_EQ(batch, nullptr); + } + + void AssertReaderClosed(const std::shared_ptr& reader) { + ASSERT_THAT(reader->Next(), + Raises(StatusCode::Invalid, ::testing::HasSubstr("already been closed"))); + } + + void AssertReaderClose(const std::shared_ptr& reader) { + ASSERT_OK(reader->Close()); + AssertReaderClosed(reader); + } +}; + +TEST_F(TestArrayDeviceStreamRoundtrip, Simple) { + std::shared_ptr device = std::make_shared(1); + auto mm = device->default_memory_manager(); + + ASSERT_OK_AND_ASSIGN(auto arr1, + ToDevice(mm, *ArrayFromJSON(int32(), "[1, 2]")->data())); + ASSERT_EQ(device->device_type(), arr1->device_type()); + ASSERT_OK_AND_ASSIGN(auto arr2, + ToDevice(mm, *ArrayFromJSON(int32(), "[4, 5, null]")->data())); + ASSERT_EQ(device->device_type(), arr2->device_type()); + auto orig_schema = arrow::schema({field("ints", int32())}); + auto batches = MakeBatches(orig_schema, {arr1, arr2}); + ASSERT_OK_AND_ASSIGN( + auto reader, RecordBatchReader::Make(batches, orig_schema, device->device_type())); + + Roundtrip(std::move(reader), [&](const std::shared_ptr& reader) { + AssertSchemaEqual(*orig_schema, *reader->schema(), /*check_metadata=*/true); + AssertReaderNext(reader, *batches[0]); + AssertReaderNext(reader, *batches[1]); + AssertReaderEnd(reader); + AssertReaderEnd(reader); + AssertReaderClose(reader); + }); +} + +TEST_F(TestArrayDeviceStreamRoundtrip, CloseEarly) { + std::shared_ptr device = std::make_shared(1); + auto mm = device->default_memory_manager(); + + ASSERT_OK_AND_ASSIGN(auto arr1, + ToDevice(mm, *ArrayFromJSON(int32(), "[1, 2]")->data())); + ASSERT_EQ(device->device_type(), arr1->device_type()); + ASSERT_OK_AND_ASSIGN(auto arr2, + ToDevice(mm, *ArrayFromJSON(int32(), "[4, 5, null]")->data())); + ASSERT_EQ(device->device_type(), arr2->device_type()); + auto orig_schema = arrow::schema({field("ints", int32())}); + auto batches = MakeBatches(orig_schema, {arr1, arr2}); + ASSERT_OK_AND_ASSIGN( + auto reader, RecordBatchReader::Make(batches, orig_schema, device->device_type())); + + Roundtrip(std::move(reader), [&](const std::shared_ptr& reader) { + AssertReaderNext(reader, *batches[0]); + AssertReaderClose(reader); + }); +} + +TEST_F(TestArrayDeviceStreamRoundtrip, Errors) { + auto reader = std::make_shared( + Status::Invalid("roundtrip error example")); + + Roundtrip(std::move(reader), [&](const std::shared_ptr& reader) { + EXPECT_THAT(reader->Next(), Raises(StatusCode::Invalid, + ::testing::HasSubstr("roundtrip error example"))); + }); +} + +TEST_F(TestArrayDeviceStreamRoundtrip, SchemaError) { + struct ArrowDeviceArrayStream stream = {}; + stream.get_last_error = [](struct ArrowDeviceArrayStream* stream) { + return "Expected error"; + }; + stream.get_schema = [](struct ArrowDeviceArrayStream* stream, + struct ArrowSchema* schema) { return EIO; }; + stream.get_next = [](struct ArrowDeviceArrayStream* stream, + struct ArrowDeviceArray* array) { return EINVAL; }; + stream.release = [](struct ArrowDeviceArrayStream* stream) { + *static_cast(stream->private_data) = true; + std::memset(stream, 0, sizeof(*stream)); + }; + bool released = false; + stream.private_data = &released; + + EXPECT_RAISES_WITH_MESSAGE_THAT(IOError, ::testing::HasSubstr("Expected error"), + ImportDeviceRecordBatchReader(&stream)); + ASSERT_TRUE(released); +} + +TEST_F(TestArrayDeviceStreamRoundtrip, ChunkedArrayRoundtrip) { + std::shared_ptr device = std::make_shared(1); + auto mm = device->default_memory_manager(); + + ASSERT_OK_AND_ASSIGN(auto arr1, + ToDevice(mm, *ArrayFromJSON(int32(), "[1, 2]")->data())); + ASSERT_EQ(device->device_type(), arr1->device_type()); + ASSERT_OK_AND_ASSIGN(auto arr2, + ToDevice(mm, *ArrayFromJSON(int32(), "[4, 5, null]")->data())); + ASSERT_EQ(device->device_type(), arr2->device_type()); + + ASSERT_OK_AND_ASSIGN(auto src, ChunkedArray::Make({arr1, arr2})); + + Roundtrip(src, [&](const std::shared_ptr& dst) { + AssertTypeEqual(*dst->type(), *src->type()); + AssertChunkedEqual(*dst, *src); + }); +} + +TEST_F(TestArrayDeviceStreamRoundtrip, ChunkedArrayRoundtripEmpty) { + ASSERT_OK_AND_ASSIGN(auto src, ChunkedArray::Make({}, int32())); + + Roundtrip(src, [&](const std::shared_ptr& dst) { + AssertTypeEqual(*dst->type(), *src->type()); + AssertChunkedEqual(*dst, *src); + }); +} + } // namespace arrow diff --git a/cpp/src/arrow/c/helpers.h b/cpp/src/arrow/c/helpers.h index a24f272feac81..6e4df17f43ebf 100644 --- a/cpp/src/arrow/c/helpers.h +++ b/cpp/src/arrow/c/helpers.h @@ -17,6 +17,7 @@ #pragma once +#include #include #include #include @@ -70,9 +71,17 @@ inline int ArrowArrayIsReleased(const struct ArrowArray* array) { return array->release == NULL; } +inline int ArrowDeviceArrayIsReleased(const struct ArrowDeviceArray* array) { + return ArrowArrayIsReleased(&array->array); +} + /// Mark the C array released (for use in release callbacks) inline void ArrowArrayMarkReleased(struct ArrowArray* array) { array->release = NULL; } +inline void ArrowDeviceArrayMarkReleased(struct ArrowDeviceArray* array) { + ArrowArrayMarkReleased(&array->array); +} + /// Move the C array from `src` to `dest` /// /// Note `dest` must *not* point to a valid array already, otherwise there @@ -84,6 +93,14 @@ inline void ArrowArrayMove(struct ArrowArray* src, struct ArrowArray* dest) { ArrowArrayMarkReleased(src); } +inline void ArrowDeviceArrayMove(struct ArrowDeviceArray* src, + struct ArrowDeviceArray* dest) { + assert(dest != src); + assert(!ArrowDeviceArrayIsReleased(src)); + memcpy(dest, src, sizeof(struct ArrowDeviceArray)); + ArrowDeviceArrayMarkReleased(src); +} + /// Release the C array, if necessary, by calling its release callback inline void ArrowArrayRelease(struct ArrowArray* array) { if (!ArrowArrayIsReleased(array)) { @@ -93,16 +110,32 @@ inline void ArrowArrayRelease(struct ArrowArray* array) { } } +inline void ArrowDeviceArrayRelease(struct ArrowDeviceArray* array) { + if (!ArrowDeviceArrayIsReleased(array)) { + array->array.release(&array->array); + ARROW_C_ASSERT(ArrowDeviceArrayIsReleased(array), + "ArrowDeviceArrayRelease did not cleanup release callback"); + } +} + /// Query whether the C array stream is released inline int ArrowArrayStreamIsReleased(const struct ArrowArrayStream* stream) { return stream->release == NULL; } +inline int ArrowDeviceArrayStreamIsReleased(const struct ArrowDeviceArrayStream* stream) { + return stream->release == NULL; +} + /// Mark the C array stream released (for use in release callbacks) inline void ArrowArrayStreamMarkReleased(struct ArrowArrayStream* stream) { stream->release = NULL; } +inline void ArrowDeviceArrayStreamMarkReleased(struct ArrowDeviceArrayStream* stream) { + stream->release = NULL; +} + /// Move the C array stream from `src` to `dest` /// /// Note `dest` must *not* point to a valid stream already, otherwise there @@ -115,6 +148,14 @@ inline void ArrowArrayStreamMove(struct ArrowArrayStream* src, ArrowArrayStreamMarkReleased(src); } +inline void ArrowDeviceArrayStreamMove(struct ArrowDeviceArrayStream* src, + struct ArrowDeviceArrayStream* dest) { + assert(dest != src); + assert(!ArrowDeviceArrayStreamIsReleased(src)); + memcpy(dest, src, sizeof(struct ArrowDeviceArrayStream)); + ArrowDeviceArrayStreamMarkReleased(src); +} + /// Release the C array stream, if necessary, by calling its release callback inline void ArrowArrayStreamRelease(struct ArrowArrayStream* stream) { if (!ArrowArrayStreamIsReleased(stream)) { @@ -124,6 +165,14 @@ inline void ArrowArrayStreamRelease(struct ArrowArrayStream* stream) { } } +inline void ArrowDeviceArrayStreamRelease(struct ArrowDeviceArrayStream* stream) { + if (!ArrowDeviceArrayStreamIsReleased(stream)) { + stream->release(stream); + ARROW_C_ASSERT(ArrowDeviceArrayStreamIsReleased(stream), + "ArrowDeviceArrayStreamRelease did not cleanup release callback"); + } +} + #ifdef __cplusplus } #endif diff --git a/cpp/src/arrow/c/util_internal.h b/cpp/src/arrow/c/util_internal.h index 6a33be9b0da8e..dc0e25710e987 100644 --- a/cpp/src/arrow/c/util_internal.h +++ b/cpp/src/arrow/c/util_internal.h @@ -32,12 +32,32 @@ struct ArrayExportTraits { typedef struct ArrowArray CType; static constexpr auto IsReleasedFunc = &ArrowArrayIsReleased; static constexpr auto ReleaseFunc = &ArrowArrayRelease; + static constexpr auto MoveFunc = &ArrowArrayMove; + static constexpr auto MarkReleased = &ArrowArrayMarkReleased; +}; + +struct ArrayDeviceExportTraits { + typedef struct ArrowDeviceArray CType; + static constexpr auto IsReleasedFunc = &ArrowDeviceArrayIsReleased; + static constexpr auto ReleaseFunc = &ArrowDeviceArrayRelease; + static constexpr auto MoveFunc = &ArrowDeviceArrayMove; + static constexpr auto MarkReleased = &ArrowDeviceArrayMarkReleased; }; struct ArrayStreamExportTraits { typedef struct ArrowArrayStream CType; static constexpr auto IsReleasedFunc = &ArrowArrayStreamIsReleased; static constexpr auto ReleaseFunc = &ArrowArrayStreamRelease; + static constexpr auto MoveFunc = &ArrowArrayStreamMove; + static constexpr auto MarkReleased = &ArrowArrayStreamMarkReleased; +}; + +struct ArrayDeviceStreamExportTraits { + typedef struct ArrowDeviceArrayStream CType; + static constexpr auto IsReleasedFunc = &ArrowDeviceArrayStreamIsReleased; + static constexpr auto ReleaseFunc = &ArrowDeviceArrayStreamRelease; + static constexpr auto MoveFunc = &ArrowDeviceArrayStreamMove; + static constexpr auto MarkReleased = &ArrowDeviceArrayStreamMarkReleased; }; // A RAII-style object to release a C Array / Schema struct at block scope exit. @@ -79,7 +99,9 @@ class ExportGuard { using SchemaExportGuard = ExportGuard; using ArrayExportGuard = ExportGuard; +using DeviceArrayExportGuard = ExportGuard; using ArrayStreamExportGuard = ExportGuard; +using DeviceArrayStreamExportGuard = ExportGuard; } // namespace internal } // namespace arrow diff --git a/cpp/src/arrow/chunk_resolver.cc b/cpp/src/arrow/chunk_resolver.cc index 29bccb52658f8..55eec53ced1c7 100644 --- a/cpp/src/arrow/chunk_resolver.cc +++ b/cpp/src/arrow/chunk_resolver.cc @@ -19,14 +19,14 @@ #include #include +#include #include #include #include "arrow/array.h" #include "arrow/record_batch.h" -namespace arrow { -namespace internal { +namespace arrow::internal { namespace { template @@ -54,6 +54,51 @@ inline std::vector MakeChunksOffsets(const std::vector& chunks) { offsets[chunks.size()] = offset; return offsets; } + +/// \pre all the pre-conditions of ChunkResolver::ResolveMany() +/// \pre num_offsets - 1 <= std::numeric_limits::max() +template +void ResolveManyInline(size_t num_offsets, const int64_t* signed_offsets, + int64_t n_indices, const IndexType* logical_index_vec, + IndexType* out_chunk_index_vec, IndexType chunk_hint, + IndexType* out_index_in_chunk_vec) { + auto* offsets = reinterpret_cast(signed_offsets); + const auto num_chunks = static_cast(num_offsets - 1); + // chunk_hint in [0, num_offsets) per the precondition. + for (int64_t i = 0; i < n_indices; i++) { + const auto index = static_cast(logical_index_vec[i]); + if (index >= offsets[chunk_hint] && + (chunk_hint == num_chunks || index < offsets[chunk_hint + 1])) { + out_chunk_index_vec[i] = chunk_hint; // hint is correct! + continue; + } + // lo < hi is guaranteed by `num_offsets = chunks.size() + 1` + auto chunk_index = + ChunkResolver::Bisect(index, offsets, /*lo=*/0, /*hi=*/num_offsets); + chunk_hint = static_cast(chunk_index); + out_chunk_index_vec[i] = chunk_hint; + } + if (out_index_in_chunk_vec != NULLPTR) { + for (int64_t i = 0; i < n_indices; i++) { + auto logical_index = logical_index_vec[i]; + auto chunk_index = out_chunk_index_vec[i]; + // chunk_index is in [0, chunks.size()] no matter what the + // value of logical_index is, so it's always safe to dereference + // offset_ as it contains chunks.size()+1 values. + out_index_in_chunk_vec[i] = + logical_index - static_cast(offsets[chunk_index]); +#if defined(ARROW_VALGRIND) || defined(ADDRESS_SANITIZER) + // Make it more likely that Valgrind/ASAN can catch an invalid memory + // access by poisoning out_index_in_chunk_vec[i] when the logical + // index is out-of-bounds. + if (chunk_index == num_chunks) { + out_index_in_chunk_vec[i] = std::numeric_limits::max(); + } +#endif + } + } +} + } // namespace ChunkResolver::ChunkResolver(const ArrayVector& chunks) noexcept @@ -84,5 +129,32 @@ ChunkResolver& ChunkResolver::operator=(const ChunkResolver& other) noexcept { return *this; } -} // namespace internal -} // namespace arrow +void ChunkResolver::ResolveManyImpl(int64_t n_indices, const uint8_t* logical_index_vec, + uint8_t* out_chunk_index_vec, uint8_t chunk_hint, + uint8_t* out_index_in_chunk_vec) const { + ResolveManyInline(offsets_.size(), offsets_.data(), n_indices, logical_index_vec, + out_chunk_index_vec, chunk_hint, out_index_in_chunk_vec); +} + +void ChunkResolver::ResolveManyImpl(int64_t n_indices, const uint32_t* logical_index_vec, + uint32_t* out_chunk_index_vec, uint32_t chunk_hint, + uint32_t* out_index_in_chunk_vec) const { + ResolveManyInline(offsets_.size(), offsets_.data(), n_indices, logical_index_vec, + out_chunk_index_vec, chunk_hint, out_index_in_chunk_vec); +} + +void ChunkResolver::ResolveManyImpl(int64_t n_indices, const uint16_t* logical_index_vec, + uint16_t* out_chunk_index_vec, uint16_t chunk_hint, + uint16_t* out_index_in_chunk_vec) const { + ResolveManyInline(offsets_.size(), offsets_.data(), n_indices, logical_index_vec, + out_chunk_index_vec, chunk_hint, out_index_in_chunk_vec); +} + +void ChunkResolver::ResolveManyImpl(int64_t n_indices, const uint64_t* logical_index_vec, + uint64_t* out_chunk_index_vec, uint64_t chunk_hint, + uint64_t* out_index_in_chunk_vec) const { + ResolveManyInline(offsets_.size(), offsets_.data(), n_indices, logical_index_vec, + out_chunk_index_vec, chunk_hint, out_index_in_chunk_vec); +} + +} // namespace arrow::internal diff --git a/cpp/src/arrow/chunk_resolver.h b/cpp/src/arrow/chunk_resolver.h index c5dad1a17b18e..a2a3d5a864243 100644 --- a/cpp/src/arrow/chunk_resolver.h +++ b/cpp/src/arrow/chunk_resolver.h @@ -20,6 +20,8 @@ #include #include #include +#include +#include #include #include "arrow/type_fwd.h" @@ -27,6 +29,8 @@ namespace arrow::internal { +struct ChunkResolver; + struct ChunkLocation { /// \brief Index of the chunk in the array of chunks /// @@ -36,8 +40,17 @@ struct ChunkLocation { /// \brief Index of the value in the chunk /// - /// The value is undefined if chunk_index >= chunks.size() + /// The value is UNDEFINED if chunk_index >= chunks.size() int64_t index_in_chunk = 0; + + ChunkLocation() = default; + + ChunkLocation(int64_t chunk_index, int64_t index_in_chunk) + : chunk_index(chunk_index), index_in_chunk(index_in_chunk) {} + + bool operator==(ChunkLocation other) const { + return chunk_index == other.chunk_index && index_in_chunk == other.index_in_chunk; + } }; /// \brief An utility that incrementally resolves logical indices into @@ -60,12 +73,35 @@ struct ARROW_EXPORT ChunkResolver { explicit ChunkResolver(const std::vector& chunks) noexcept; explicit ChunkResolver(const RecordBatchVector& batches) noexcept; + /// \brief Construct a ChunkResolver from a vector of chunks.size() + 1 offsets. + /// + /// The first offset must be 0 and the last offset must be the logical length of the + /// chunked array. Each offset before the last represents the starting logical index of + /// the corresponding chunk. + explicit ChunkResolver(std::vector offsets) noexcept + : offsets_(std::move(offsets)), cached_chunk_(0) { +#ifndef NDEBUG + assert(offsets_.size() >= 1); + assert(offsets_[0] == 0); + for (size_t i = 1; i < offsets_.size(); i++) { + assert(offsets_[i] >= offsets_[i - 1]); + } +#endif + } + ChunkResolver(ChunkResolver&& other) noexcept; ChunkResolver& operator=(ChunkResolver&& other) noexcept; ChunkResolver(const ChunkResolver& other) noexcept; ChunkResolver& operator=(const ChunkResolver& other) noexcept; + int64_t logical_array_length() const { return offsets_.back(); } + int64_t num_chunks() const { return static_cast(offsets_.size()) - 1; } + + int64_t chunk_length(int64_t chunk_index) const { + return offsets_[chunk_index + 1] - offsets_[chunk_index]; + } + /// \brief Resolve a logical index to a ChunkLocation. /// /// The returned ChunkLocation contains the chunk index and the within-chunk index @@ -81,7 +117,7 @@ struct ARROW_EXPORT ChunkResolver { const auto cached_chunk = cached_chunk_.load(std::memory_order_relaxed); const auto chunk_index = ResolveChunkIndex(index, cached_chunk); - return {chunk_index, index - offsets_[chunk_index]}; + return ChunkLocation{chunk_index, index - offsets_[chunk_index]}; } /// \brief Resolve a logical index to a ChunkLocation. @@ -97,12 +133,70 @@ struct ARROW_EXPORT ChunkResolver { /// \return ChunkLocation with a valid chunk_index if index is within /// bounds, or with chunk_index == chunks.size() if logical index is /// `>= chunked_array.length()`. - inline ChunkLocation ResolveWithChunkIndexHint(int64_t index, - ChunkLocation hint) const { + inline ChunkLocation ResolveWithHint(int64_t index, ChunkLocation hint) const { assert(hint.chunk_index < static_cast(offsets_.size())); const auto chunk_index = ResolveChunkIndex(index, hint.chunk_index); - return {chunk_index, index - offsets_[chunk_index]}; + return ChunkLocation{chunk_index, index - offsets_[chunk_index]}; + } + + /// \brief Resolve `n_indices` logical indices to chunk indices. + /// + /// \pre 0 <= logical_index_vec[i] < logical_array_length() + /// (for well-defined and valid chunk index results) + /// \pre out_chunk_index_vec has space for `n_indices` + /// \pre chunk_hint in [0, chunks.size()] + /// \post out_chunk_index_vec[i] in [0, chunks.size()] for i in [0, n) + /// \post if logical_index_vec[i] >= chunked_array.length(), then + /// out_chunk_index_vec[i] == chunks.size() + /// and out_index_in_chunk_vec[i] is UNDEFINED (can be out-of-bounds) + /// \post if logical_index_vec[i] < 0, then both out_chunk_index_vec[i] and + /// out_index_in_chunk_vec[i] are UNDEFINED + /// + /// \param n_indices The number of logical indices to resolve + /// \param logical_index_vec The logical indices to resolve + /// \param out_chunk_index_vec The output array where the chunk indices will be written + /// \param chunk_hint 0 or the last chunk_index produced by ResolveMany + /// \param out_index_in_chunk_vec If not NULLPTR, the output array where the + /// within-chunk indices will be written + /// \return false iff chunks.size() > std::numeric_limits::max() + template + [[nodiscard]] bool ResolveMany(int64_t n_indices, const IndexType* logical_index_vec, + IndexType* out_chunk_index_vec, IndexType chunk_hint = 0, + IndexType* out_index_in_chunk_vec = NULLPTR) const { + if constexpr (sizeof(IndexType) < sizeof(uint64_t)) { + // The max value returned by Bisect is `offsets.size() - 1` (= chunks.size()). + constexpr uint64_t kMaxIndexTypeValue = std::numeric_limits::max(); + // A ChunkedArray with enough empty chunks can make the index of a chunk + // exceed the logical index and thus the maximum value of IndexType. + const bool chunk_index_fits_on_type = + static_cast(offsets_.size() - 1) <= kMaxIndexTypeValue; + if (ARROW_PREDICT_FALSE(!chunk_index_fits_on_type)) { + return false; + } + // Since an index-in-chunk cannot possibly exceed the logical index being + // queried, we don't have to worry about these values not fitting on IndexType. + } + if constexpr (std::is_signed_v) { + // We interpret signed integers as unsigned and avoid having to generate double + // the amount of binary code to handle each integer width. + // + // Negative logical indices can become large values when cast to unsigned, and + // they are gracefully handled by ResolveManyImpl, but both the chunk index + // and the index in chunk values will be undefined in these cases. This + // happend because int8_t(-1) == uint8_t(255) and 255 could be a valid + // logical index in the chunked array. + using U = std::make_unsigned_t; + ResolveManyImpl(n_indices, reinterpret_cast(logical_index_vec), + reinterpret_cast(out_chunk_index_vec), + static_cast(chunk_hint), + reinterpret_cast(out_index_in_chunk_vec)); + } else { + static_assert(std::is_unsigned_v); + ResolveManyImpl(n_indices, logical_index_vec, out_chunk_index_vec, chunk_hint, + out_index_in_chunk_vec); + } + return true; } private: @@ -130,17 +224,33 @@ struct ARROW_EXPORT ChunkResolver { return chunk_index; } + /// \pre all the pre-conditions of ChunkResolver::ResolveMany() + /// \pre num_offsets - 1 <= std::numeric_limits::max() + void ResolveManyImpl(int64_t, const uint8_t*, uint8_t*, uint8_t, uint8_t*) const; + void ResolveManyImpl(int64_t, const uint16_t*, uint16_t*, uint16_t, uint16_t*) const; + void ResolveManyImpl(int64_t, const uint32_t*, uint32_t*, uint32_t, uint32_t*) const; + void ResolveManyImpl(int64_t, const uint64_t*, uint64_t*, uint64_t, uint64_t*) const; + + public: /// \brief Find the index of the chunk that contains the logical index. /// /// Any non-negative index is accepted. When `hi=num_offsets`, the largest /// possible return value is `num_offsets-1` which is equal to - /// `chunks.size()`. The is returned when the logical index is out-of-bounds. + /// `chunks.size()`. Which is returned when the logical index is greater or + /// equal the logical length of the chunked array. /// - /// \pre index >= 0 + /// \pre index >= 0 (otherwise, when index is negative, hi-1 is returned) /// \pre lo < hi /// \pre lo >= 0 && hi <= offsets_.size() static inline int64_t Bisect(int64_t index, const int64_t* offsets, int64_t lo, int64_t hi) { + return Bisect(static_cast(index), + reinterpret_cast(offsets), static_cast(lo), + static_cast(hi)); + } + + static inline int64_t Bisect(uint64_t index, const uint64_t* offsets, uint64_t lo, + uint64_t hi) { // Similar to std::upper_bound(), but slightly different as our offsets // array always starts with 0. auto n = hi - lo; @@ -148,8 +258,8 @@ struct ARROW_EXPORT ChunkResolver { // (lo < hi is guaranteed by the precondition). assert(n > 1 && "lo < hi is a precondition of Bisect"); do { - const int64_t m = n >> 1; - const int64_t mid = lo + m; + const uint64_t m = n >> 1; + const uint64_t mid = lo + m; if (index >= offsets[mid]) { lo = mid; n -= m; diff --git a/cpp/src/arrow/chunked_array_test.cc b/cpp/src/arrow/chunked_array_test.cc index 6ca52ab46ca68..e9cc283b53cd5 100644 --- a/cpp/src/arrow/chunked_array_test.cc +++ b/cpp/src/arrow/chunked_array_test.cc @@ -23,6 +23,7 @@ #include #include +#include "arrow/chunk_resolver.h" #include "arrow/scalar.h" #include "arrow/status.h" #include "arrow/testing/builder.h" @@ -34,6 +35,9 @@ namespace arrow { +using internal::ChunkLocation; +using internal::ChunkResolver; + class TestChunkedArray : public ::testing::Test { protected: virtual void Construct() { @@ -310,4 +314,200 @@ TEST_F(TestChunkedArray, GetScalar) { ASSERT_RAISES(IndexError, carr.GetScalar(7)); } +// ChunkResolver tests + +using IndexTypes = ::testing::Types; + +TEST(TestChunkResolver, Resolve) { + ChunkResolver empty(std::vector({0})); // [] + // ChunkLocation::index_in_chunk is undefined when chunk_index==chunks.size(), + // so only chunk_index is compared in these cases. + ASSERT_EQ(empty.Resolve(0).chunk_index, 0); + ASSERT_EQ(empty.Resolve(0).chunk_index, 0); + + ChunkResolver one(std::vector({0, 1})); // [[0]] + ASSERT_EQ(one.Resolve(1).chunk_index, 1); + ASSERT_EQ(one.Resolve(0), (ChunkLocation(0, 0))); + ASSERT_EQ(one.Resolve(1).chunk_index, 1); + + ChunkResolver one_and_empty(std::vector({0, 1, 1, 1})); // [[0], [], []] + ASSERT_EQ(one_and_empty.Resolve(3).chunk_index, 3); + ASSERT_EQ(one_and_empty.Resolve(2).chunk_index, 3); + ASSERT_EQ(one_and_empty.Resolve(1).chunk_index, 3); + ASSERT_EQ(one_and_empty.Resolve(0), (ChunkLocation(0, 0))); + ASSERT_EQ(one_and_empty.Resolve(1).chunk_index, 3); + ASSERT_EQ(one_and_empty.Resolve(2).chunk_index, 3); + ASSERT_EQ(one_and_empty.Resolve(3).chunk_index, 3); + + ChunkResolver one_one_one(std::vector({0, 1, 2, 3})); // [[0], [1], [2]] + ASSERT_EQ(one_one_one.Resolve(3).chunk_index, 3); + ASSERT_EQ(one_one_one.Resolve(2), (ChunkLocation(2, 0))); + ASSERT_EQ(one_one_one.Resolve(1), (ChunkLocation(1, 0))); + ASSERT_EQ(one_one_one.Resolve(0), (ChunkLocation(0, 0))); + ASSERT_EQ(one_one_one.Resolve(1), (ChunkLocation(1, 0))); + ASSERT_EQ(one_one_one.Resolve(2), (ChunkLocation(2, 0))); + ASSERT_EQ(one_one_one.Resolve(3).chunk_index, 3); + + ChunkResolver resolver(std::vector({0, 2, 3, 10})); // [[0, 1], [2], [3..9]] + ASSERT_EQ(resolver.Resolve(10).chunk_index, 3); + ASSERT_EQ(resolver.Resolve(9), (ChunkLocation(2, 6))); + ASSERT_EQ(resolver.Resolve(8), (ChunkLocation(2, 5))); + ASSERT_EQ(resolver.Resolve(4), (ChunkLocation(2, 1))); + ASSERT_EQ(resolver.Resolve(3), (ChunkLocation(2, 0))); + ASSERT_EQ(resolver.Resolve(2), (ChunkLocation(1, 0))); + ASSERT_EQ(resolver.Resolve(1), (ChunkLocation(0, 1))); + ASSERT_EQ(resolver.Resolve(0), (ChunkLocation(0, 0))); + ASSERT_EQ(resolver.Resolve(1), (ChunkLocation(0, 1))); + ASSERT_EQ(resolver.Resolve(2), (ChunkLocation(1, 0))); + ASSERT_EQ(resolver.Resolve(3), (ChunkLocation(2, 0))); + ASSERT_EQ(resolver.Resolve(4), (ChunkLocation(2, 1))); + ASSERT_EQ(resolver.Resolve(8), (ChunkLocation(2, 5))); + ASSERT_EQ(resolver.Resolve(9), (ChunkLocation(2, 6))); + ASSERT_EQ(resolver.Resolve(10).chunk_index, 3); +} + +template +class TestChunkResolverMany : public ::testing::Test { + public: + using IndexType = T; + + Result> ResolveMany( + const ChunkResolver& resolver, const std::vector& logical_index_vec) { + const size_t n = logical_index_vec.size(); + std::vector chunk_index_vec; + chunk_index_vec.resize(n); + std::vector index_in_chunk_vec; + index_in_chunk_vec.resize(n); + bool valid = resolver.ResolveMany( + static_cast(n), logical_index_vec.data(), chunk_index_vec.data(), 0, + index_in_chunk_vec.data()); + if (ARROW_PREDICT_FALSE(!valid)) { + return Status::Invalid("index type doesn't fit possible chunk indexes"); + } + std::vector locations; + locations.reserve(n); + for (size_t i = 0; i < n; i++) { + auto chunk_index = static_cast(chunk_index_vec[i]); + auto index_in_chunk = static_cast(index_in_chunk_vec[i]); + locations.emplace_back(chunk_index, index_in_chunk); + } + return locations; + } + + void CheckResolveMany(const ChunkResolver& resolver, + const std::vector& logical_index_vec) { + ASSERT_OK_AND_ASSIGN(auto locations, ResolveMany(resolver, logical_index_vec)); + EXPECT_EQ(logical_index_vec.size(), locations.size()); + for (size_t i = 0; i < logical_index_vec.size(); i++) { + IndexType logical_index = logical_index_vec[i]; + const auto expected = resolver.Resolve(logical_index); + ASSERT_LE(expected.chunk_index, resolver.num_chunks()); + if (expected.chunk_index == resolver.num_chunks()) { + // index_in_chunk is undefined in this case + ASSERT_EQ(locations[i].chunk_index, expected.chunk_index); + } else { + ASSERT_EQ(locations[i], expected); + } + } + } + + void TestBasics() { + std::vector logical_index_vec; + + ChunkResolver empty(std::vector({0})); // [] + logical_index_vec = {0, 0}; + CheckResolveMany(empty, logical_index_vec); + + ChunkResolver one(std::vector({0, 1})); // [[0]] + logical_index_vec = {1, 0, 1}; + CheckResolveMany(one, logical_index_vec); + + ChunkResolver one_and_empty(std::vector({0, 1, 1, 1})); // [[0], [], []] + logical_index_vec = {3, 2, 1, 0, 1, 2, 3}; + CheckResolveMany(one_and_empty, logical_index_vec); + + ChunkResolver one_one_one(std::vector({0, 1, 2, 3})); // [[0], [1], [2]] + logical_index_vec = {3, 2, 1, 0, 1, 2, 3}; + CheckResolveMany(one_one_one, logical_index_vec); + + ChunkResolver resolver(std::vector({0, 2, 3, 10})); // [[0, 1], [2], [3..9]] + logical_index_vec = {10, 9, 8, 4, 3, 2, 1, 0, 1, 2, 3, 4, 8, 9, 10}; + CheckResolveMany(resolver, logical_index_vec); + } + + void TestOutOfBounds() { + ChunkResolver resolver(std::vector({0, 2, 3, 10})); // [[0, 1], [2], [3..9]] + + std::vector logical_index_vec = {10, 11, 12, 13, 14, 13, 11, 10}; + ASSERT_OK_AND_ASSIGN(auto locations, ResolveMany(resolver, logical_index_vec)); + EXPECT_EQ(logical_index_vec.size(), locations.size()); + for (size_t i = 0; i < logical_index_vec.size(); i++) { + ASSERT_EQ(locations[i].chunk_index, resolver.num_chunks()); + } + + if constexpr (std::is_signed_v) { + std::vector logical_index_vec = {-1, -2, -3, -4, INT8_MIN}; + + ChunkResolver resolver(std::vector({0, 2, 128})); // [[0, 1], [2..127]] + ASSERT_OK_AND_ASSIGN(auto locations, ResolveMany(resolver, logical_index_vec)); + EXPECT_EQ(logical_index_vec.size(), locations.size()); + for (size_t i = 0; i < logical_index_vec.size(); i++) { + // All the negative indices are greater than resolver.logical_array_length()-1 + // when cast to uint8_t. + ASSERT_EQ(locations[i].chunk_index, resolver.num_chunks()); + } + + if constexpr (sizeof(IndexType) == 1) { + ChunkResolver resolver(std::vector( + {0, 2, 128, 129, 256})); // [[0, 1], [2..127], [128], [129, 255]] + ASSERT_OK_AND_ASSIGN(auto locations, ResolveMany(resolver, logical_index_vec)); + EXPECT_EQ(logical_index_vec.size(), locations.size()); + for (size_t i = 0; i < logical_index_vec.size(); i++) { + if constexpr (sizeof(IndexType) == 1) { + // All the negative 8-bit indices are SMALLER than + // resolver.logical_array_length()=256 when cast to 8-bit unsigned integers. + // So the resolved locations might look valid, but they should not be trusted. + ASSERT_LT(locations[i].chunk_index, resolver.num_chunks()); + } else { + // All the negative indices are greater than resolver.logical_array_length() + // when cast to 16/32/64-bit unsigned integers. + ASSERT_EQ(locations[i].chunk_index, resolver.num_chunks()); + } + } + } + } + } + + void TestOverflow() { + const int64_t kMaxIndex = std::is_signed_v ? 127 : 255; + std::vector logical_index_vec = {0, 1, 2, + static_cast(kMaxIndex)}; + + // Overflows are rare because to make them possible, we need more chunks + // than logical elements in the ChunkedArray. That requires at least one + // empty chunk. + std::vector offsets; + for (int64_t i = 0; i <= kMaxIndex; i++) { + offsets.push_back(i); + } + ChunkResolver resolver{offsets}; + ASSERT_OK(ResolveMany(resolver, logical_index_vec)); + + offsets.push_back(kMaxIndex); // adding an empty chunk + ChunkResolver resolver_with_empty{offsets}; + if (sizeof(IndexType) == 1) { + ASSERT_NOT_OK(ResolveMany(resolver_with_empty, logical_index_vec)); + } else { + ASSERT_OK(ResolveMany(resolver_with_empty, logical_index_vec)); + } + } +}; + +TYPED_TEST_SUITE(TestChunkResolverMany, IndexTypes); + +TYPED_TEST(TestChunkResolverMany, Basics) { this->TestBasics(); } +TYPED_TEST(TestChunkResolverMany, OutOfBounds) { this->TestOutOfBounds(); } +TYPED_TEST(TestChunkResolverMany, Overflow) { this->TestOverflow(); } + } // namespace arrow diff --git a/cpp/src/arrow/compute/CMakeLists.txt b/cpp/src/arrow/compute/CMakeLists.txt index badcf4f2f26ac..0a8018cd580cf 100644 --- a/cpp/src/arrow/compute/CMakeLists.txt +++ b/cpp/src/arrow/compute/CMakeLists.txt @@ -90,7 +90,9 @@ add_arrow_test(internals_test light_array_test.cc registry_test.cc key_hash_test.cc - row/compare_test.cc) + row/compare_test.cc + row/grouper_test.cc + util_internal_test.cc) add_arrow_compute_test(expression_test SOURCES expression_test.cc) diff --git a/cpp/src/arrow/compute/api_vector.cc b/cpp/src/arrow/compute/api_vector.cc index d47ee42ebf239..f0d5c0fcc3d72 100644 --- a/cpp/src/arrow/compute/api_vector.cc +++ b/cpp/src/arrow/compute/api_vector.cc @@ -153,6 +153,8 @@ static auto kRankOptionsType = GetFunctionOptionsType( DataMember("tiebreaker", &RankOptions::tiebreaker)); static auto kPairwiseOptionsType = GetFunctionOptionsType( DataMember("periods", &PairwiseOptions::periods)); +static auto kListFlattenOptionsType = GetFunctionOptionsType( + DataMember("recursive", &ListFlattenOptions::recursive)); } // namespace } // namespace internal @@ -224,6 +226,10 @@ PairwiseOptions::PairwiseOptions(int64_t periods) : FunctionOptions(internal::kPairwiseOptionsType), periods(periods) {} constexpr char PairwiseOptions::kTypeName[]; +ListFlattenOptions::ListFlattenOptions(bool recursive) + : FunctionOptions(internal::kListFlattenOptionsType), recursive(recursive) {} +constexpr char ListFlattenOptions::kTypeName[]; + namespace internal { void RegisterVectorOptions(FunctionRegistry* registry) { DCHECK_OK(registry->AddFunctionOptionsType(kFilterOptionsType)); @@ -237,6 +243,7 @@ void RegisterVectorOptions(FunctionRegistry* registry) { DCHECK_OK(registry->AddFunctionOptionsType(kCumulativeOptionsType)); DCHECK_OK(registry->AddFunctionOptionsType(kRankOptionsType)); DCHECK_OK(registry->AddFunctionOptionsType(kPairwiseOptionsType)); + DCHECK_OK(registry->AddFunctionOptionsType(kListFlattenOptionsType)); } } // namespace internal diff --git a/cpp/src/arrow/compute/api_vector.h b/cpp/src/arrow/compute/api_vector.h index 919572f16ee69..e5bcc37329661 100644 --- a/cpp/src/arrow/compute/api_vector.h +++ b/cpp/src/arrow/compute/api_vector.h @@ -245,6 +245,18 @@ class ARROW_EXPORT PairwiseOptions : public FunctionOptions { int64_t periods = 1; }; +/// \brief Options for list_flatten function +class ARROW_EXPORT ListFlattenOptions : public FunctionOptions { + public: + explicit ListFlattenOptions(bool recursive = false); + static constexpr char const kTypeName[] = "ListFlattenOptions"; + static ListFlattenOptions Defaults() { return ListFlattenOptions(); } + + /// \brief If true, the list is flattened recursively until a non-list + /// array is formed. + bool recursive = false; +}; + /// @} /// \brief Filter with a boolean selection filter diff --git a/cpp/src/arrow/compute/exec.cc b/cpp/src/arrow/compute/exec.cc index f2e4578383122..05c4936482b0b 100644 --- a/cpp/src/arrow/compute/exec.cc +++ b/cpp/src/arrow/compute/exec.cc @@ -923,7 +923,7 @@ class ScalarExecutor : public KernelExecutorImpl { DCHECK(output.is_array_data()); // Emit a result for each chunk - RETURN_NOT_OK(EmitResult(std::move(output.array_data()), listener)); + RETURN_NOT_OK(EmitResult(output.array_data(), listener)); } return Status::OK(); } @@ -1107,7 +1107,7 @@ class VectorExecutor : public KernelExecutorImpl { RETURN_NOT_OK(PropagateNulls(kernel_ctx_, span, out.array_data().get())); } RETURN_NOT_OK(kernel_->exec(kernel_ctx_, span, &out)); - return EmitResult(std::move(out.array_data()), listener); + return EmitResult(out.array_data(), listener); } Status ExecChunked(const ExecBatch& batch, ExecListener* listener) { @@ -1116,10 +1116,10 @@ class VectorExecutor : public KernelExecutorImpl { ARROW_ASSIGN_OR_RAISE(out.value, PrepareOutput(batch.length)); RETURN_NOT_OK(kernel_->exec_chunked(kernel_ctx_, batch, &out)); if (out.is_array()) { - return EmitResult(std::move(out.array()), listener); + return EmitResult(out.array(), listener); } else { DCHECK(out.is_chunked_array()); - return EmitResult(std::move(out.chunked_array()), listener); + return EmitResult(out.chunked_array(), listener); } } diff --git a/cpp/src/arrow/compute/expression.cc b/cpp/src/arrow/compute/expression.cc index 532869b3453a7..b1d914ce873cc 100644 --- a/cpp/src/arrow/compute/expression.cc +++ b/cpp/src/arrow/compute/expression.cc @@ -1645,7 +1645,7 @@ Expression and_(const std::vector& operands) { Expression folded = operands.front(); for (auto it = operands.begin() + 1; it != operands.end(); ++it) { - folded = and_(std::move(folded), std::move(*it)); + folded = and_(std::move(folded), *it); } return folded; } @@ -1659,7 +1659,7 @@ Expression or_(const std::vector& operands) { Expression folded = operands.front(); for (auto it = operands.begin() + 1; it != operands.end(); ++it) { - folded = or_(std::move(folded), std::move(*it)); + folded = or_(std::move(folded), *it); } return folded; } diff --git a/cpp/src/arrow/compute/function_internal.h b/cpp/src/arrow/compute/function_internal.h index 653273ef0fac2..9d8928466baa5 100644 --- a/cpp/src/arrow/compute/function_internal.h +++ b/cpp/src/arrow/compute/function_internal.h @@ -684,12 +684,13 @@ const FunctionOptionsType* GetFunctionOptionsType(const Properties&... propertie auto options = std::make_unique(); RETURN_NOT_OK( FromStructScalarImpl(options.get(), scalar, properties_).status_); - return std::move(options); + // R build with openSUSE155 requires an explicit unique_ptr construction + return std::unique_ptr(std::move(options)); } std::unique_ptr Copy(const FunctionOptions& options) const override { auto out = std::make_unique(); CopyImpl(out.get(), checked_cast(options), properties_); - return std::move(out); + return out; } private: diff --git a/cpp/src/arrow/compute/kernel.cc b/cpp/src/arrow/compute/kernel.cc index fd554ba3d83c5..5c87ef2cd0561 100644 --- a/cpp/src/arrow/compute/kernel.cc +++ b/cpp/src/arrow/compute/kernel.cc @@ -75,7 +75,7 @@ Result> ScalarAggregateKernel::MergeAll( for (auto& state : states) { RETURN_NOT_OK(kernel->merge(ctx, std::move(*state), out.get())); } - return std::move(out); + return out; } // ---------------------------------------------------------------------- @@ -361,7 +361,8 @@ size_t InputType::Hash() const { case InputType::EXACT_TYPE: hash_combine(result, type_->Hash()); break; - default: + case InputType::ANY_TYPE: + case InputType::USE_TYPE_MATCHER: break; } return result; @@ -378,10 +379,8 @@ std::string InputType::ToString() const { break; case InputType::USE_TYPE_MATCHER: { ss << type_matcher_->ToString(); - } break; - default: - DCHECK(false); break; + } } return ss.str(); } @@ -400,9 +399,8 @@ bool InputType::Equals(const InputType& other) const { return type_->Equals(*other.type_); case InputType::USE_TYPE_MATCHER: return type_matcher_->Equals(*other.type_matcher_); - default: - return false; } + return false; } bool InputType::Matches(const DataType& type) const { @@ -411,21 +409,23 @@ bool InputType::Matches(const DataType& type) const { return type_->Equals(type); case InputType::USE_TYPE_MATCHER: return type_matcher_->Matches(type); - default: - // ANY_TYPE + case InputType::ANY_TYPE: return true; } + return false; } bool InputType::Matches(const Datum& value) const { switch (value.kind()) { + case Datum::NONE: + case Datum::RECORD_BATCH: + case Datum::TABLE: + DCHECK(false) << "Matches expects ARRAY, CHUNKED_ARRAY or SCALAR"; + return false; case Datum::ARRAY: case Datum::CHUNKED_ARRAY: case Datum::SCALAR: break; - default: - DCHECK(false); - return false; } return Matches(*value.type()); } @@ -445,11 +445,13 @@ const TypeMatcher& InputType::type_matcher() const { Result OutputType::Resolve(KernelContext* ctx, const std::vector& types) const { - if (kind_ == OutputType::FIXED) { - return type_.get(); - } else { - return resolver_(ctx, types); + switch (kind_) { + case OutputType::FIXED: + return type_; + case OutputType::COMPUTED: + break; } + return resolver_(ctx, types); } const std::shared_ptr& OutputType::type() const { @@ -463,11 +465,13 @@ const OutputType::Resolver& OutputType::resolver() const { } std::string OutputType::ToString() const { - if (kind_ == OutputType::FIXED) { - return type_->ToString(); - } else { - return "computed"; + switch (kind_) { + case OutputType::FIXED: + return type_->ToString(); + case OutputType::COMPUTED: + break; } + return "computed"; } // ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/compute/kernels/codegen_internal.cc b/cpp/src/arrow/compute/kernels/codegen_internal.cc index 00a833742f957..0fd9cae7a8d71 100644 --- a/cpp/src/arrow/compute/kernels/codegen_internal.cc +++ b/cpp/src/arrow/compute/kernels/codegen_internal.cc @@ -23,6 +23,7 @@ #include #include +#include "arrow/compute/api_vector.h" #include "arrow/type_fwd.h" namespace arrow { @@ -56,9 +57,23 @@ Result LastType(KernelContext*, const std::vector& types return types.back(); } -Result ListValuesType(KernelContext*, const std::vector& args) { - const auto& list_type = checked_cast(*args[0].type); - return list_type.value_type().get(); +Result ListValuesType(KernelContext* ctx, + const std::vector& args) { + auto list_type = checked_cast(args[0].type); + auto value_type = list_type->value_type().get(); + + auto recursive = + ctx->state() ? OptionsWrapper::Get(ctx).recursive : false; + if (!recursive) { + return value_type; + } + + for (auto value_kind = value_type->id(); + is_list(value_kind) || is_list_view(value_kind); value_kind = value_type->id()) { + list_type = checked_cast(list_type->value_type().get()); + value_type = list_type->value_type().get(); + } + return value_type; } void EnsureDictionaryDecoded(std::vector* types) { diff --git a/cpp/src/arrow/compute/kernels/codegen_internal.h b/cpp/src/arrow/compute/kernels/codegen_internal.h index 097ee1de45b6a..9e46a21887f8c 100644 --- a/cpp/src/arrow/compute/kernels/codegen_internal.h +++ b/cpp/src/arrow/compute/kernels/codegen_internal.h @@ -423,7 +423,8 @@ static void VisitTwoArrayValuesInline(const ArraySpan& arr0, const ArraySpan& ar Result FirstType(KernelContext*, const std::vector& types); Result LastType(KernelContext*, const std::vector& types); -Result ListValuesType(KernelContext*, const std::vector& types); +Result ListValuesType(KernelContext* ctx, + const std::vector& types); // ---------------------------------------------------------------------- // Helpers for iterating over common DataType instances for adding kernels to diff --git a/cpp/src/arrow/compute/kernels/hash_aggregate.cc b/cpp/src/arrow/compute/kernels/hash_aggregate.cc index 5052d8dd66694..54cd695421a93 100644 --- a/cpp/src/arrow/compute/kernels/hash_aggregate.cc +++ b/cpp/src/arrow/compute/kernels/hash_aggregate.cc @@ -83,7 +83,8 @@ Result> HashAggregateInit(KernelContext* ctx, const KernelInitArgs& args) { auto impl = std::make_unique(); RETURN_NOT_OK(impl->Init(ctx->exec_context(), args)); - return std::move(impl); + // R build with openSUSE155 requires an explicit unique_ptr construction + return std::unique_ptr(std::move(impl)); } Status HashAggregateResize(KernelContext* ctx, int64_t num_groups) { @@ -813,7 +814,7 @@ struct GroupedMeanImpl (*null_count)++; bit_util::SetBitTo((*null_bitmap)->mutable_data(), i, false); } - return std::move(values); + return values; } std::shared_ptr out_type() const override { @@ -1114,7 +1115,8 @@ Result> VarStdInit(KernelContext* ctx, auto impl = std::make_unique>(); impl->result_type_ = result_type; RETURN_NOT_OK(impl->Init(ctx->exec_context(), args)); - return std::move(impl); + // R build with openSUSE155 requires an explicit unique_ptr construction + return std::unique_ptr(std::move(impl)); } template @@ -1685,7 +1687,7 @@ Result> MinMaxInit(KernelContext* ctx, const KernelInitArgs& args) { ARROW_ASSIGN_OR_RAISE(auto impl, HashAggregateInit>(ctx, args)); static_cast*>(impl.get())->type_ = args.inputs[0].GetSharedPtr(); - return std::move(impl); + return impl; } template @@ -2188,7 +2190,7 @@ Result> FirstLastInit(KernelContext* ctx, ARROW_ASSIGN_OR_RAISE(auto impl, HashAggregateInit>(ctx, args)); static_cast*>(impl.get())->type_ = args.inputs[0].GetSharedPtr(); - return std::move(impl); + return impl; } template @@ -2597,7 +2599,7 @@ Result> GroupedDistinctInit(KernelContext* ctx, instance->out_type_ = args.inputs[0].GetSharedPtr(); ARROW_ASSIGN_OR_RAISE(instance->grouper_, Grouper::Make(args.inputs, ctx->exec_context())); - return std::move(impl); + return impl; } // ---------------------------------------------------------------------- @@ -2839,7 +2841,7 @@ Result> GroupedOneInit(KernelContext* ctx, ARROW_ASSIGN_OR_RAISE(auto impl, HashAggregateInit>(ctx, args)); auto instance = static_cast*>(impl.get()); instance->out_type_ = args.inputs[0].GetSharedPtr(); - return std::move(impl); + return impl; } struct GroupedOneFactory { @@ -3237,7 +3239,7 @@ Result> GroupedListInit(KernelContext* ctx, ARROW_ASSIGN_OR_RAISE(auto impl, HashAggregateInit>(ctx, args)); auto instance = static_cast*>(impl.get()); instance->out_type_ = args.inputs[0].GetSharedPtr(); - return std::move(impl); + return impl; } struct GroupedListFactory { diff --git a/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc b/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc index efd25a8a20c80..eb243de4a765e 100644 --- a/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc +++ b/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc @@ -503,7 +503,7 @@ Result ResolveDecimalBinaryOperationOutput( ToResult(getter(left_type.precision(), left_type.scale(), right_type.precision(), right_type.scale()))); ARROW_ASSIGN_OR_RAISE(auto type, DecimalType::Make(left_type.id(), precision, scale)); - return std::move(type); + return type; } Result ResolveDecimalAdditionOrSubtractionOutput( @@ -566,7 +566,7 @@ Result ResolveTemporalOutput(KernelContext*, } auto type = duration(right_type.unit()); - return std::move(type); + return type; } template diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_string.cc b/cpp/src/arrow/compute/kernels/scalar_cast_string.cc index 3a8352a9b870f..dc3fe29a3dfae 100644 --- a/cpp/src/arrow/compute/kernels/scalar_cast_string.cc +++ b/cpp/src/arrow/compute/kernels/scalar_cast_string.cc @@ -340,10 +340,15 @@ BinaryToBinaryCastExec(KernelContext* ctx, const ExecSpan& batch, ExecResult* ou if (input.offset == output->offset) { output->buffers[0] = input.GetBuffer(0); } else { - ARROW_ASSIGN_OR_RAISE( - output->buffers[0], - arrow::internal::CopyBitmap(ctx->memory_pool(), input.buffers[0].data, - input.offset, input.length)); + // When the offsets are different (e.g., due to slice operation), we need to check if + // the null bitmap buffer is not null before copying it. The null bitmap buffer can be + // null if the input array value does not contain any null value. + if (input.buffers[0].data != NULLPTR) { + ARROW_ASSIGN_OR_RAISE( + output->buffers[0], + arrow::internal::CopyBitmap(ctx->memory_pool(), input.buffers[0].data, + input.offset, input.length)); + } } // This buffer is preallocated diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_test.cc b/cpp/src/arrow/compute/kernels/scalar_cast_test.cc index af62b4da2caa5..a6d7f6097b59b 100644 --- a/cpp/src/arrow/compute/kernels/scalar_cast_test.cc +++ b/cpp/src/arrow/compute/kernels/scalar_cast_test.cc @@ -2196,6 +2196,33 @@ TEST(Cast, BinaryOrStringToFixedSizeBinary) { } } +TEST(Cast, FixedSizeBinaryToBinaryOrString) { + for (auto out_type : {utf8(), large_utf8(), binary(), large_binary()}) { + auto valid_input = ArrayFromJSON(fixed_size_binary(3), R"(["foo", null, "bar", + "baz", "quu"])"); + + CheckCast(valid_input, ArrayFromJSON(out_type, R"(["foo", null, "bar", "baz", + "quu"])")); + + auto empty_input = ArrayFromJSON(fixed_size_binary(3), "[]"); + CheckCast(empty_input, ArrayFromJSON(out_type, "[]")); + } +} + +TEST(Cast, FixedSizeBinaryToBinaryOrStringWithSlice) { + for (auto out_type : {utf8(), large_utf8(), binary(), large_binary()}) { + auto valid_input = ArrayFromJSON(fixed_size_binary(3), R"(["foo", null, "bar", + "baz", "quu"])"); + auto sliced = valid_input->Slice(1, 3); + CheckCast(sliced, ArrayFromJSON(out_type, R"([null, "bar", "baz"])")); + + auto valid_input_without_null = ArrayFromJSON(fixed_size_binary(3), R"(["foo", "bar", + "baz", "quu"])"); + auto sliced_without_null = valid_input_without_null->Slice(1, 3); + CheckCast(sliced_without_null, ArrayFromJSON(out_type, R"(["bar", "baz", "quu"])")); + } +} + TEST(Cast, IntToString) { for (auto string_type : {utf8(), large_utf8()}) { CheckCast(ArrayFromJSON(int8(), "[0, 1, 127, -128, null]"), diff --git a/cpp/src/arrow/compute/kernels/scalar_if_else.cc b/cpp/src/arrow/compute/kernels/scalar_if_else.cc index ee181c053c053..6368ef525ff9c 100644 --- a/cpp/src/arrow/compute/kernels/scalar_if_else.cc +++ b/cpp/src/arrow/compute/kernels/scalar_if_else.cc @@ -1309,9 +1309,10 @@ void AddFixedWidthIfElseKernel(const std::shared_ptr& scalar_fun } void AddNestedIfElseKernels(const std::shared_ptr& scalar_function) { - for (const auto type_id : {Type::LIST, Type::LARGE_LIST, Type::LIST_VIEW, - Type::LARGE_LIST_VIEW, Type::FIXED_SIZE_LIST, Type::STRUCT, - Type::DENSE_UNION, Type::SPARSE_UNION, Type::DICTIONARY}) { + for (const auto type_id : + {Type::LIST, Type::LARGE_LIST, Type::LIST_VIEW, Type::LARGE_LIST_VIEW, + Type::FIXED_SIZE_LIST, Type::MAP, Type::STRUCT, Type::DENSE_UNION, + Type::SPARSE_UNION, Type::DICTIONARY}) { ScalarKernel kernel({boolean(), InputType(type_id), InputType(type_id)}, LastType, NestedIfElseExec::Exec); kernel.null_handling = NullHandling::COMPUTED_NO_PREALLOCATE; @@ -1482,39 +1483,27 @@ Status ExecScalarCaseWhen(KernelContext* ctx, const ExecSpan& batch, ExecResult* result = temp.get(); } - // TODO(wesm): clean this up to have less duplication - if (out->is_array_data()) { - ArrayData* output = out->array_data().get(); - if (is_dictionary_type::value) { - const ExecValue& dict_from = has_result ? result : batch[1]; - if (dict_from.is_scalar()) { - output->dictionary = checked_cast(*dict_from.scalar) - .value.dictionary->data(); - } else { - output->dictionary = dict_from.array.ToArrayData()->dictionary; - } - } - CopyValues(result, /*in_offset=*/0, batch.length, - output->GetMutableValues(0, 0), - output->GetMutableValues(1, 0), output->offset); - } else { - // ArraySpan - ArraySpan* output = out->array_span_mutable(); - if (is_dictionary_type::value) { - const ExecValue& dict_from = has_result ? result : batch[1]; - output->child_data.resize(1); - if (dict_from.is_scalar()) { - output->child_data[0].SetMembers( - *checked_cast(*dict_from.scalar) - .value.dictionary->data()); - } else { - output->child_data[0] = dict_from.array; - } + // Only input types of non-fixed length (which cannot be pre-allocated) + // will save the output data in ArrayData. And make sure the FixedLength + // types must be output in ArraySpan. + static_assert(is_fixed_width(Type::type_id)); + DCHECK(out->is_array_span()); + + ArraySpan* output = out->array_span_mutable(); + if (is_dictionary_type::value) { + const ExecValue& dict_from = has_result ? result : batch[1]; + output->child_data.resize(1); + if (dict_from.is_scalar()) { + output->child_data[0].SetMembers( + *checked_cast(*dict_from.scalar) + .value.dictionary->data()); + } else { + output->child_data[0] = dict_from.array; } - CopyValues(result, /*in_offset=*/0, batch.length, - output->GetValues(0, 0), output->GetValues(1, 0), - output->offset); } + CopyValues(result, /*in_offset=*/0, batch.length, + output->GetValues(0, 0), output->GetValues(1, 0), + output->offset); return Status::OK(); } @@ -1847,6 +1836,48 @@ struct CaseWhenFunctor> { } }; +// TODO(GH-41453): a more efficient implementation for list-views is possible +template +struct CaseWhenFunctor> { + using offset_type = typename Type::offset_type; + using BuilderType = typename TypeTraits::BuilderType; + static Status Exec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) { + /// TODO(wesm): should this be a DCHECK? Or checked elsewhere + if (batch[0].null_count() > 0) { + return Status::Invalid("cond struct must not have outer nulls"); + } + if (batch[0].is_scalar()) { + return ExecVarWidthScalarCaseWhen(ctx, batch, out); + } + return ExecArray(ctx, batch, out); + } + + static Status ExecArray(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) { + return ExecVarWidthArrayCaseWhen( + ctx, batch, out, + // ReserveData + [&](ArrayBuilder* raw_builder) { + auto builder = checked_cast(raw_builder); + auto child_builder = builder->value_builder(); + + int64_t reservation = 0; + for (int arg = 1; arg < batch.num_values(); arg++) { + const ExecValue& source = batch[arg]; + if (!source.is_array()) { + const auto& scalar = checked_cast(*source.scalar); + if (!scalar.value) continue; + reservation = + std::max(reservation, batch.length * scalar.value->length()); + } else { + const ArraySpan& array = source.array; + reservation = std::max(reservation, array.child_data[0].length); + } + } + return child_builder->Reserve(reservation); + }); + } +}; + // No-op reserve function, pulled out to avoid apparent miscompilation on MinGW Status ReserveNoData(ArrayBuilder*) { return Status::OK(); } @@ -2712,6 +2743,25 @@ void AddBinaryCaseWhenKernels(const std::shared_ptr& scalar_fu } } +template +void AddNestedCaseWhenKernel(const std::shared_ptr& scalar_function) { + AddCaseWhenKernel(scalar_function, ArrowNestedType::type_id, + CaseWhenFunctor::Exec); +} + +void AddNestedCaseWhenKernels(const std::shared_ptr& scalar_function) { + AddNestedCaseWhenKernel(scalar_function); + AddNestedCaseWhenKernel(scalar_function); + AddNestedCaseWhenKernel(scalar_function); + AddNestedCaseWhenKernel(scalar_function); + AddNestedCaseWhenKernel(scalar_function); + AddNestedCaseWhenKernel(scalar_function); + AddNestedCaseWhenKernel(scalar_function); + AddNestedCaseWhenKernel(scalar_function); + AddNestedCaseWhenKernel(scalar_function); + AddNestedCaseWhenKernel(scalar_function); +} + void AddCoalesceKernel(const std::shared_ptr& scalar_function, detail::GetTypeId get_id, ArrayKernelExec exec) { ScalarKernel kernel(KernelSignature::Make({InputType(get_id.id)}, FirstType, @@ -2731,6 +2781,25 @@ void AddPrimitiveCoalesceKernels(const std::shared_ptr& scalar_f } } +template +void AddNestedCoalesceKernel(const std::shared_ptr& scalar_function) { + AddCoalesceKernel(scalar_function, ArrowNestedType::type_id, + CoalesceFunctor::Exec); +} + +void AddNestedCoalesceKernels(const std::shared_ptr& scalar_function) { + AddNestedCoalesceKernel(scalar_function); + AddNestedCoalesceKernel(scalar_function); + AddNestedCoalesceKernel(scalar_function); + AddNestedCoalesceKernel(scalar_function); + AddNestedCoalesceKernel(scalar_function); + AddNestedCoalesceKernel(scalar_function); + AddNestedCoalesceKernel(scalar_function); + AddNestedCoalesceKernel(scalar_function); + AddNestedCoalesceKernel(scalar_function); + AddNestedCoalesceKernel(scalar_function); +} + void AddChooseKernel(const std::shared_ptr& scalar_function, detail::GetTypeId get_id, ArrayKernelExec exec) { ScalarKernel kernel(KernelSignature::Make({Type::INT64, InputType(get_id.id)}, LastType, @@ -2822,15 +2891,7 @@ void RegisterScalarIfElse(FunctionRegistry* registry) { AddCaseWhenKernel(func, Type::DECIMAL128, CaseWhenFunctor::Exec); AddCaseWhenKernel(func, Type::DECIMAL256, CaseWhenFunctor::Exec); AddBinaryCaseWhenKernels(func, BaseBinaryTypes()); - AddCaseWhenKernel(func, Type::FIXED_SIZE_LIST, - CaseWhenFunctor::Exec); - AddCaseWhenKernel(func, Type::LIST, CaseWhenFunctor::Exec); - AddCaseWhenKernel(func, Type::LARGE_LIST, CaseWhenFunctor::Exec); - AddCaseWhenKernel(func, Type::MAP, CaseWhenFunctor::Exec); - AddCaseWhenKernel(func, Type::STRUCT, CaseWhenFunctor::Exec); - AddCaseWhenKernel(func, Type::DENSE_UNION, CaseWhenFunctor::Exec); - AddCaseWhenKernel(func, Type::SPARSE_UNION, CaseWhenFunctor::Exec); - AddCaseWhenKernel(func, Type::DICTIONARY, CaseWhenFunctor::Exec); + AddNestedCaseWhenKernels(func); DCHECK_OK(registry->AddFunction(std::move(func))); } { @@ -2848,15 +2909,7 @@ void RegisterScalarIfElse(FunctionRegistry* registry) { for (const auto& ty : BaseBinaryTypes()) { AddCoalesceKernel(func, ty, GenerateTypeAgnosticVarBinaryBase(ty)); } - AddCoalesceKernel(func, Type::FIXED_SIZE_LIST, - CoalesceFunctor::Exec); - AddCoalesceKernel(func, Type::LIST, CoalesceFunctor::Exec); - AddCoalesceKernel(func, Type::LARGE_LIST, CoalesceFunctor::Exec); - AddCoalesceKernel(func, Type::MAP, CoalesceFunctor::Exec); - AddCoalesceKernel(func, Type::STRUCT, CoalesceFunctor::Exec); - AddCoalesceKernel(func, Type::DENSE_UNION, CoalesceFunctor::Exec); - AddCoalesceKernel(func, Type::SPARSE_UNION, CoalesceFunctor::Exec); - AddCoalesceKernel(func, Type::DICTIONARY, CoalesceFunctor::Exec); + AddNestedCoalesceKernels(func); DCHECK_OK(registry->AddFunction(std::move(func))); } { diff --git a/cpp/src/arrow/compute/kernels/scalar_if_else_benchmark.cc b/cpp/src/arrow/compute/kernels/scalar_if_else_benchmark.cc index 58bc560f52842..5988908853d50 100644 --- a/cpp/src/arrow/compute/kernels/scalar_if_else_benchmark.cc +++ b/cpp/src/arrow/compute/kernels/scalar_if_else_benchmark.cc @@ -284,8 +284,11 @@ static void CaseWhenBench(benchmark::State& state) { state.SetItemsProcessed(state.iterations() * (len - offset)); } -static void CaseWhenBenchList(benchmark::State& state) { - auto type = list(int64()); +template +static void CaseWhenBenchList(benchmark::State& state, + const std::shared_ptr& type) { + using ArrayType = typename TypeTraits::ArrayType; + auto fld = field("", type); int64_t len = state.range(0); @@ -295,17 +298,17 @@ static void CaseWhenBenchList(benchmark::State& state) { auto cond_field = field("cond", boolean(), key_value_metadata({{"null_probability", "0.01"}})); - auto cond = rand.ArrayOf(*field("", struct_({cond_field, cond_field, cond_field}), - key_value_metadata({{"null_probability", "0.0"}})), - len); - auto val1 = rand.ArrayOf(*fld, len); - auto val2 = rand.ArrayOf(*fld, len); - auto val3 = rand.ArrayOf(*fld, len); - auto val4 = rand.ArrayOf(*fld, len); + auto cond = std::static_pointer_cast( + rand.ArrayOf(*field("", struct_({cond_field, cond_field, cond_field}), + key_value_metadata({{"null_probability", "0.0"}})), + len)) + ->Slice(offset); + auto val1 = std::static_pointer_cast(rand.ArrayOf(*fld, len))->Slice(offset); + auto val2 = std::static_pointer_cast(rand.ArrayOf(*fld, len))->Slice(offset); + auto val3 = std::static_pointer_cast(rand.ArrayOf(*fld, len))->Slice(offset); + auto val4 = std::static_pointer_cast(rand.ArrayOf(*fld, len))->Slice(offset); for (auto _ : state) { - ABORT_NOT_OK( - CaseWhen(cond->Slice(offset), {val1->Slice(offset), val2->Slice(offset), - val3->Slice(offset), val4->Slice(offset)})); + ABORT_NOT_OK(CaseWhen(cond, {val1, val2, val3, val4})); } // Set bytes processed to ~length of output @@ -372,6 +375,21 @@ static void CaseWhenBenchStringContiguous(benchmark::State& state) { return CaseWhenBenchContiguous(state); } +template +static void CaseWhenBenchVarLengthListLike(benchmark::State& state) { + auto value_type = TypeTraits::type_singleton(); + auto list_type = std::make_shared(value_type); + return CaseWhenBenchList(state, list_type); +} + +static void CaseWhenBenchListInt64(benchmark::State& state) { + return CaseWhenBenchVarLengthListLike(state); +} + +static void CaseWhenBenchListViewInt64(benchmark::State& state) { + CaseWhenBenchVarLengthListLike(state); +} + struct CoalesceParams { int64_t length; int64_t num_arguments; @@ -533,9 +551,11 @@ BENCHMARK(CaseWhenBench64)->Args({kNumItems, 99}); BENCHMARK(CaseWhenBench64Contiguous)->Args({kNumItems, 0}); BENCHMARK(CaseWhenBench64Contiguous)->Args({kNumItems, 99}); -// CaseWhen: Lists -BENCHMARK(CaseWhenBenchList)->Args({kFewItems, 0}); -BENCHMARK(CaseWhenBenchList)->Args({kFewItems, 99}); +// CaseWhen: List-like types +BENCHMARK(CaseWhenBenchListInt64)->Args({kFewItems, 0}); +BENCHMARK(CaseWhenBenchListInt64)->Args({kFewItems, 99}); +BENCHMARK(CaseWhenBenchListViewInt64)->Args({kFewItems, 0}); +BENCHMARK(CaseWhenBenchListViewInt64)->Args({kFewItems, 99}); // CaseWhen: Strings BENCHMARK(CaseWhenBenchString)->Args({kFewItems, 0}); diff --git a/cpp/src/arrow/compute/kernels/scalar_if_else_test.cc b/cpp/src/arrow/compute/kernels/scalar_if_else_test.cc index c4c46b5efe84d..9a0ca325277dc 100644 --- a/cpp/src/arrow/compute/kernels/scalar_if_else_test.cc +++ b/cpp/src/arrow/compute/kernels/scalar_if_else_test.cc @@ -896,6 +896,21 @@ TEST_F(TestIfElseKernel, ParameterizedTypes) { {cond, ArrayFromJSON(type0, "[0]"), ArrayFromJSON(type1, "[1]")})); } +TEST_F(TestIfElseKernel, MapNested) { + auto type = map(int64(), utf8()); + CheckWithDifferentShapes( + ArrayFromJSON(boolean(), "[true, true, false, false]"), + ArrayFromJSON(type, R"([null, [[2, "foo"], [4, null]], [[3, "test"]], []])"), + ArrayFromJSON(type, R"([[[1, "b"]], [[2, "c"]], [[7, "abc"]], null])"), + ArrayFromJSON(type, R"([null, [[2, "foo"], [4, null]], [[7, "abc"]], null])")); + + CheckWithDifferentShapes( + ArrayFromJSON(boolean(), "[null, null, null, null]"), + ArrayFromJSON(type, R"([null, [[1, "c"]], [[4, null]], [[6, "ok"]]])"), + ArrayFromJSON(type, R"([[[-1, null]], [[3, "c"]], null, [[6, "ok"]]])"), + ArrayFromJSON(type, R"([null, null, null, null])")); +} + template class TestIfElseUnion : public ::testing::Test {}; @@ -1920,7 +1935,7 @@ TYPED_TEST(TestCaseWhenBinary, Random) { template class TestCaseWhenList : public ::testing::Test {}; -TYPED_TEST_SUITE(TestCaseWhenList, ListArrowTypes); +TYPED_TEST_SUITE(TestCaseWhenList, ListAndListViewArrowTypes); TYPED_TEST(TestCaseWhenList, ListOfString) { auto type = std::make_shared(utf8()); @@ -2555,7 +2570,7 @@ class TestCoalesceList : public ::testing::Test {}; TYPED_TEST_SUITE(TestCoalesceNumeric, IfElseNumericBasedTypes); TYPED_TEST_SUITE(TestCoalesceBinary, BaseBinaryArrowTypes); -TYPED_TEST_SUITE(TestCoalesceList, ListArrowTypes); +TYPED_TEST_SUITE(TestCoalesceList, ListAndListViewArrowTypes); TYPED_TEST(TestCoalesceNumeric, Basics) { auto type = default_type_instance(); diff --git a/cpp/src/arrow/compute/kernels/scalar_nested.cc b/cpp/src/arrow/compute/kernels/scalar_nested.cc index 733ab9c0dc287..b99f065a0b158 100644 --- a/cpp/src/arrow/compute/kernels/scalar_nested.cc +++ b/cpp/src/arrow/compute/kernels/scalar_nested.cc @@ -23,6 +23,7 @@ #include "arrow/compute/api_scalar.h" #include "arrow/compute/kernels/common_internal.h" #include "arrow/result.h" +#include "arrow/type_fwd.h" #include "arrow/util/bit_block_counter.h" #include "arrow/util/bit_util.h" #include "arrow/util/bitmap_generate.h" @@ -41,10 +42,17 @@ Status ListValueLength(KernelContext* ctx, const ExecSpan& batch, ExecResult* ou const ArraySpan& arr = batch[0].array; ArraySpan* out_arr = out->array_span_mutable(); auto out_values = out_arr->GetValues(1); - const offset_type* offsets = arr.GetValues(1); - // Offsets are always well-defined and monotonic, even for null values - for (int64_t i = 0; i < arr.length; ++i) { - *out_values++ = offsets[i + 1] - offsets[i]; + if (is_list_view(*arr.type)) { + const auto* sizes = arr.GetValues(2); + if (arr.length > 0) { + memcpy(out_values, sizes, arr.length * sizeof(offset_type)); + } + } else { + const offset_type* offsets = arr.GetValues(1); + // Offsets are always well-defined and monotonic, even for null values + for (int64_t i = 0; i < arr.length; ++i) { + *out_values++ = offsets[i + 1] - offsets[i]; + } } return Status::OK(); } @@ -59,6 +67,30 @@ Status FixedSizeListValueLength(KernelContext* ctx, const ExecSpan& batch, return Status::OK(); } +template +void AddListValueLengthKernel(ScalarFunction* func, + const std::shared_ptr& out_type) { + auto in_type = {InputType(InListType::type_id)}; + ScalarKernel kernel(in_type, out_type, ListValueLength); + DCHECK_OK(func->AddKernel(std::move(kernel))); +} + +template <> +void AddListValueLengthKernel( + ScalarFunction* func, const std::shared_ptr& out_type) { + auto in_type = {InputType(Type::FIXED_SIZE_LIST)}; + ScalarKernel kernel(in_type, out_type, FixedSizeListValueLength); + DCHECK_OK(func->AddKernel(std::move(kernel))); +} + +void AddListValueLengthKernels(ScalarFunction* func) { + AddListValueLengthKernel(func, int32()); + AddListValueLengthKernel(func, int64()); + AddListValueLengthKernel(func, int32()); + AddListValueLengthKernel(func, int64()); + AddListValueLengthKernel(func, int32()); +} + const FunctionDoc list_value_length_doc{ "Compute list lengths", ("`lists` must have a list-like type.\n" @@ -399,6 +431,8 @@ void AddListElementKernels(ScalarFunction* func) { void AddListElementKernels(ScalarFunction* func) { AddListElementKernels(func); AddListElementKernels(func); + AddListElementKernels(func); + AddListElementKernels(func); AddListElementKernels(func); } @@ -824,12 +858,7 @@ const FunctionDoc map_lookup_doc{ void RegisterScalarNested(FunctionRegistry* registry) { auto list_value_length = std::make_shared( "list_value_length", Arity::Unary(), list_value_length_doc); - DCHECK_OK(list_value_length->AddKernel({InputType(Type::LIST)}, int32(), - ListValueLength)); - DCHECK_OK(list_value_length->AddKernel({InputType(Type::FIXED_SIZE_LIST)}, int32(), - FixedSizeListValueLength)); - DCHECK_OK(list_value_length->AddKernel({InputType(Type::LARGE_LIST)}, int64(), - ListValueLength)); + AddListValueLengthKernels(list_value_length.get()); DCHECK_OK(registry->AddFunction(std::move(list_value_length))); auto list_element = diff --git a/cpp/src/arrow/compute/kernels/scalar_nested_test.cc b/cpp/src/arrow/compute/kernels/scalar_nested_test.cc index a72ec99620b82..32bea8246954d 100644 --- a/cpp/src/arrow/compute/kernels/scalar_nested_test.cc +++ b/cpp/src/arrow/compute/kernels/scalar_nested_test.cc @@ -30,11 +30,21 @@ namespace arrow { namespace compute { static std::shared_ptr GetOffsetType(const DataType& type) { - return type.id() == Type::LIST ? int32() : int64(); + switch (type.id()) { + case Type::LIST: + case Type::LIST_VIEW: + return int32(); + case Type::LARGE_LIST: + case Type::LARGE_LIST_VIEW: + return int64(); + default: + Unreachable("Unexpected type"); + } } TEST(TestScalarNested, ListValueLength) { - for (auto ty : {list(int32()), large_list(int32())}) { + for (auto ty : {list(int32()), large_list(int32()), list_view(int32()), + large_list_view(int32())}) { CheckScalarUnary("list_value_length", ty, "[[0, null, 1], null, [2, 3], []]", GetOffsetType(*ty), "[3, null, 2, 0]"); } @@ -47,7 +57,8 @@ TEST(TestScalarNested, ListValueLength) { TEST(TestScalarNested, ListElementNonFixedListWithNulls) { auto sample = "[[7, 5, 81], [6, null, 4, 7, 8], [3, 12, 2, 0], [1, 9], null]"; for (auto ty : NumericTypes()) { - for (auto list_type : {list(ty), large_list(ty)}) { + for (auto list_type : + {list(ty), large_list(ty), list_view(ty), large_list_view(ty)}) { auto input = ArrayFromJSON(list_type, sample); auto null_input = ArrayFromJSON(list_type, "[null]"); for (auto index_type : IntTypes()) { diff --git a/cpp/src/arrow/compute/kernels/scalar_string_ascii.cc b/cpp/src/arrow/compute/kernels/scalar_string_ascii.cc index 038e623b43c53..762b666c6a148 100644 --- a/cpp/src/arrow/compute/kernels/scalar_string_ascii.cc +++ b/cpp/src/arrow/compute/kernels/scalar_string_ascii.cc @@ -1315,7 +1315,7 @@ struct RegexSubstringMatcher { const MatchSubstringOptions& options, bool is_utf8 = true, bool literal = false) { auto matcher = std::make_unique(options, is_utf8, literal); RETURN_NOT_OK(RegexStatus(matcher->regex_match_)); - return std::move(matcher); + return matcher; } explicit RegexSubstringMatcher(const MatchSubstringOptions& options, @@ -1685,7 +1685,7 @@ struct FindSubstringRegex { bool is_utf8 = true, bool literal = false) { auto matcher = FindSubstringRegex(options, is_utf8, literal); RETURN_NOT_OK(RegexStatus(*matcher.regex_match_)); - return std::move(matcher); + return matcher; } explicit FindSubstringRegex(const MatchSubstringOptions& options, bool is_utf8 = true, @@ -1832,7 +1832,7 @@ struct CountSubstringRegex { bool is_utf8 = true, bool literal = false) { CountSubstringRegex counter(options, is_utf8, literal); RETURN_NOT_OK(RegexStatus(*counter.regex_match_)); - return std::move(counter); + return counter; } template @@ -2055,7 +2055,7 @@ struct RegexSubstringReplacer { std::move(replacement_error)); } - return std::move(replacer); + return replacer; } // Using RE2::FindAndConsume we can only find the pattern if it is a group, therefore @@ -2203,7 +2203,7 @@ struct ExtractRegexData { } data.group_names.emplace_back(item->second); } - return std::move(data); + return data; } Result ResolveOutputType(const std::vector& types) const { diff --git a/cpp/src/arrow/compute/kernels/test_util.cc b/cpp/src/arrow/compute/kernels/test_util.cc index 23d0fd18d578a..2217787663a63 100644 --- a/cpp/src/arrow/compute/kernels/test_util.cc +++ b/cpp/src/arrow/compute/kernels/test_util.cc @@ -31,6 +31,7 @@ #include "arrow/datum.h" #include "arrow/result.h" #include "arrow/table.h" +#include "arrow/testing/fixed_width_test_util.h" #include "arrow/testing/gtest_util.h" namespace arrow { diff --git a/cpp/src/arrow/compute/kernels/vector_hash.cc b/cpp/src/arrow/compute/kernels/vector_hash.cc index 800deba3a5ed2..44bb7372c3f68 100644 --- a/cpp/src/arrow/compute/kernels/vector_hash.cc +++ b/cpp/src/arrow/compute/kernels/vector_hash.cc @@ -530,7 +530,8 @@ Result> HashInit(KernelContext* ctx, auto result = std::make_unique(args.inputs[0].GetSharedPtr(), args.options, ctx->memory_pool()); RETURN_NOT_OK(result->Reset()); - return std::move(result); + // R build with openSUSE155 requires an explicit unique_ptr construction + return std::unique_ptr(std::move(result)); } template diff --git a/cpp/src/arrow/compute/kernels/vector_nested.cc b/cpp/src/arrow/compute/kernels/vector_nested.cc index 08930e589f7b4..8c77c261c6a98 100644 --- a/cpp/src/arrow/compute/kernels/vector_nested.cc +++ b/cpp/src/arrow/compute/kernels/vector_nested.cc @@ -18,6 +18,7 @@ // Vector kernels involving nested types #include "arrow/array/array_base.h" +#include "arrow/compute/api_vector.h" #include "arrow/compute/kernels/common_internal.h" #include "arrow/result.h" #include "arrow/visit_type_inline.h" @@ -29,8 +30,13 @@ namespace { template Status ListFlatten(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) { + auto recursive = OptionsWrapper::Get(ctx).recursive; typename TypeTraits::ArrayType list_array(batch[0].array.ToArrayData()); - ARROW_ASSIGN_OR_RAISE(auto result, list_array.Flatten(ctx->memory_pool())); + + auto pool = ctx->memory_pool(); + ARROW_ASSIGN_OR_RAISE(auto result, (recursive ? list_array.FlattenRecursively(pool) + : list_array.Flatten(pool))); + out->value = std::move(result->data()); return Status::OK(); } @@ -107,10 +113,15 @@ struct ListParentIndicesArray { const FunctionDoc list_flatten_doc( "Flatten list values", - ("`lists` must have a list-like type.\n" - "Return an array with the top list level flattened.\n" - "Top-level null values in `lists` do not emit anything in the input."), - {"lists"}); + ("`lists` must have a list-like type (lists, list-views, and\n" + "fixed-size lists).\n" + "Return an array with the top list level flattened unless\n" + "`recursive` is set to true in ListFlattenOptions. When that\n" + "is that case, flattening happens recursively until a non-list\n" + "array is formed.\n" + "\n" + "Null list values do not emit anything to the output."), + {"lists"}, "ListFlattenOptions"); const FunctionDoc list_parent_indices_doc( "Compute parent indices of nested list values", @@ -153,17 +164,34 @@ class ListParentIndicesFunction : public MetaFunction { } }; +const ListFlattenOptions* GetDefaultListFlattenOptions() { + static const auto kDefaultListFlattenOptions = ListFlattenOptions::Defaults(); + return &kDefaultListFlattenOptions; +} + +template +void AddBaseListFlattenKernels(VectorFunction* func) { + auto in_type = {InputType(InListType::type_id)}; + auto out_type = OutputType(ListValuesType); + VectorKernel kernel(in_type, out_type, ListFlatten, + OptionsWrapper::Init); + DCHECK_OK(func->AddKernel(std::move(kernel))); +} + +void AddBaseListFlattenKernels(VectorFunction* func) { + AddBaseListFlattenKernels(func); + AddBaseListFlattenKernels(func); + AddBaseListFlattenKernels(func); + AddBaseListFlattenKernels(func); + AddBaseListFlattenKernels(func); +} + } // namespace void RegisterVectorNested(FunctionRegistry* registry) { - auto flatten = - std::make_shared("list_flatten", Arity::Unary(), list_flatten_doc); - DCHECK_OK(flatten->AddKernel({Type::LIST}, OutputType(ListValuesType), - ListFlatten)); - DCHECK_OK(flatten->AddKernel({Type::FIXED_SIZE_LIST}, OutputType(ListValuesType), - ListFlatten)); - DCHECK_OK(flatten->AddKernel({Type::LARGE_LIST}, OutputType(ListValuesType), - ListFlatten)); + auto flatten = std::make_shared( + "list_flatten", Arity::Unary(), list_flatten_doc, GetDefaultListFlattenOptions()); + AddBaseListFlattenKernels(flatten.get()); DCHECK_OK(registry->AddFunction(std::move(flatten))); DCHECK_OK(registry->AddFunction(std::make_shared())); diff --git a/cpp/src/arrow/compute/kernels/vector_nested_test.cc b/cpp/src/arrow/compute/kernels/vector_nested_test.cc index eef1b6835ffb5..56604ebd16cc0 100644 --- a/cpp/src/arrow/compute/kernels/vector_nested_test.cc +++ b/cpp/src/arrow/compute/kernels/vector_nested_test.cc @@ -19,6 +19,7 @@ #include "arrow/chunked_array.h" #include "arrow/compute/api.h" +#include "arrow/compute/api_vector.h" #include "arrow/compute/kernels/test_util.h" #include "arrow/result.h" #include "arrow/testing/gtest_util.h" @@ -29,38 +30,113 @@ namespace compute { using arrow::internal::checked_cast; -TEST(TestVectorNested, ListFlatten) { - for (auto ty : {list(int16()), large_list(int16())}) { - auto input = ArrayFromJSON(ty, "[[0, null, 1], null, [2, 3], []]"); - auto expected = ArrayFromJSON(int16(), "[0, null, 1, 2, 3]"); +using ListAndListViewTypes = + ::testing::Types; + +// ---------------------------------------------------------------------- +// [Large]List and [Large]ListView tests +template +class TestVectorNestedSpecialized : public ::testing::Test { + public: + using TypeClass = T; + + void SetUp() override { + value_type_ = int16(); + type_ = std::make_shared(value_type_); + } + + public: + void TestListFlatten() { + auto input = ArrayFromJSON(type_, "[[0, null, 1], null, [2, 3], []]"); + auto expected = ArrayFromJSON(value_type_, "[0, null, 1, 2, 3]"); CheckVectorUnary("list_flatten", input, expected); // Construct a list with a non-empty null slot auto tweaked = TweakValidityBit(input, 0, false); - expected = ArrayFromJSON(int16(), "[2, 3]"); + expected = ArrayFromJSON(value_type_, "[2, 3]"); CheckVectorUnary("list_flatten", tweaked, expected); } -} -TEST(TestVectorNested, ListFlattenNulls) { - const auto ty = list(int32()); - auto input = ArrayFromJSON(ty, "[null, null]"); - auto expected = ArrayFromJSON(int32(), "[]"); - CheckVectorUnary("list_flatten", input, expected); -} + void TestListFlattenNulls() { + value_type_ = int32(); + type_ = std::make_shared(value_type_); + auto input = ArrayFromJSON(type_, "[null, null]"); + auto expected = ArrayFromJSON(value_type_, "[]"); + CheckVectorUnary("list_flatten", input, expected); + } -TEST(TestVectorNested, ListFlattenChunkedArray) { - for (auto ty : {list(int16()), large_list(int16())}) { - ARROW_SCOPED_TRACE(ty->ToString()); - auto input = ChunkedArrayFromJSON(ty, {"[[0, null, 1], null]", "[[2, 3], []]"}); - auto expected = ChunkedArrayFromJSON(int16(), {"[0, null, 1]", "[2, 3]"}); + void TestListFlattenChunkedArray() { + ARROW_SCOPED_TRACE(type_->ToString()); + auto input = ChunkedArrayFromJSON(type_, {"[[0, null, 1], null]", "[[2, 3], []]"}); + auto expected = ChunkedArrayFromJSON(value_type_, {"[0, null, 1]", "[2, 3]"}); CheckVectorUnary("list_flatten", input, expected); ARROW_SCOPED_TRACE("empty"); - input = ChunkedArrayFromJSON(ty, {}); - expected = ChunkedArrayFromJSON(int16(), {}); + input = ChunkedArrayFromJSON(type_, {}); + expected = ChunkedArrayFromJSON(value_type_, {}); CheckVectorUnary("list_flatten", input, expected); } + + void TestListFlattenRecursively() { + auto inner_type = std::make_shared(value_type_); + type_ = std::make_shared(inner_type); + + ListFlattenOptions opts; + opts.recursive = true; + + // List types with two nesting levels: list> + auto input = ArrayFromJSON(type_, R"([ + [[0, 1, 2], null, [3, null]], + [null], + [[2, 9], [4], [], [6, 5]] + ])"); + auto expected = ArrayFromJSON(value_type_, "[0, 1, 2, 3, null, 2, 9, 4, 6, 5]"); + CheckVectorUnary("list_flatten", input, expected, &opts); + + // Empty nested list should flatten until non-list type is reached + input = ArrayFromJSON(type_, R"([null])"); + expected = ArrayFromJSON(value_type_, "[]"); + CheckVectorUnary("list_flatten", input, expected, &opts); + + // List types with three nesting levels: list>> + type_ = std::make_shared(std::make_shared(fixed_size_list(value_type_, 2))); + input = ArrayFromJSON(type_, R"([ + [ + [[null, 0]], + [[3, 7], null] + ], + [ + [[4, null], [5, 8]], + [[8, null]], + null + ], + [ + null + ] + ])"); + expected = ArrayFromJSON(value_type_, "[null, 0, 3, 7, 4, null, 5, 8, 8, null]"); + CheckVectorUnary("list_flatten", input, expected, &opts); + } + + protected: + std::shared_ptr type_; + std::shared_ptr value_type_; +}; + +TYPED_TEST_SUITE(TestVectorNestedSpecialized, ListAndListViewTypes); + +TYPED_TEST(TestVectorNestedSpecialized, ListFlatten) { this->TestListFlatten(); } + +TYPED_TEST(TestVectorNestedSpecialized, ListFlattenNulls) { + this->TestListFlattenNulls(); +} + +TYPED_TEST(TestVectorNestedSpecialized, ListFlattenChunkedArray) { + this->TestListFlattenChunkedArray(); +} + +TYPED_TEST(TestVectorNestedSpecialized, ListFlattenRecursively) { + this->TestListFlattenRecursively(); } TEST(TestVectorNested, ListFlattenFixedSizeList) { @@ -92,6 +168,21 @@ TEST(TestVectorNested, ListFlattenFixedSizeListNulls) { CheckVectorUnary("list_flatten", input, expected); } +TEST(TestVectorNested, ListFlattenFixedSizeListRecursively) { + ListFlattenOptions opts; + opts.recursive = true; + + auto inner_type = fixed_size_list(int32(), 2); + auto type = fixed_size_list(inner_type, 2); + auto input = ArrayFromJSON(type, R"([ + [[0, 1], [null, 3]], + [[7, null], [2, 5]], + [null, null] + ])"); + auto expected = ArrayFromJSON(int32(), "[0, 1, null, 3, 7, null, 2, 5]"); + CheckVectorUnary("list_flatten", input, expected, &opts); +} + TEST(TestVectorNested, ListParentIndices) { for (auto ty : {list(int16()), large_list(int16())}) { auto input = ArrayFromJSON(ty, "[[0, null, 1], null, [2, 3], [], [4, 5]]"); diff --git a/cpp/src/arrow/compute/kernels/vector_selection_filter_internal.cc b/cpp/src/arrow/compute/kernels/vector_selection_filter_internal.cc index 8825d697fdf77..5e24331fe96f2 100644 --- a/cpp/src/arrow/compute/kernels/vector_selection_filter_internal.cc +++ b/cpp/src/arrow/compute/kernels/vector_selection_filter_internal.cc @@ -22,6 +22,7 @@ #include #include +#include "arrow/array/concatenate.h" #include "arrow/array/data.h" #include "arrow/buffer_builder.h" #include "arrow/chunked_array.h" @@ -40,6 +41,7 @@ #include "arrow/util/bit_run_reader.h" #include "arrow/util/bit_util.h" #include "arrow/util/bitmap_ops.h" +#include "arrow/util/fixed_width_internal.h" namespace arrow { @@ -158,9 +160,11 @@ class PrimitiveFilterImpl { PrimitiveFilterImpl(const ArraySpan& values, const ArraySpan& filter, FilterOptions::NullSelectionBehavior null_selection, ArrayData* out_arr) - : byte_width_(values.type->byte_width()), + : byte_width_(util::FixedWidthInBytes(*values.type)), values_is_valid_(values.buffers[0].data), - values_data_(values.buffers[1].data), + // No offset applied for boolean because it's a bitmap + values_data_(kIsBoolean ? values.buffers[1].data + : util::OffsetPointerOfFixedByteWidthValues(values)), values_null_count_(values.null_count), values_offset_(values.offset), values_length_(values.length), @@ -169,17 +173,13 @@ class PrimitiveFilterImpl { if constexpr (kByteWidth >= 0 && !kIsBoolean) { DCHECK_EQ(kByteWidth, byte_width_); } - if constexpr (!kIsBoolean) { - // No offset applied for boolean because it's a bitmap - values_data_ += values.offset * byte_width(); - } + DCHECK_EQ(out_arr->offset, 0); if (out_arr->buffers[0] != nullptr) { // May be unallocated if neither filter nor values contain nulls out_is_valid_ = out_arr->buffers[0]->mutable_data(); } - out_data_ = out_arr->buffers[1]->mutable_data(); - DCHECK_EQ(out_arr->offset, 0); + out_data_ = util::MutableFixedWidthValuesPointer(out_arr); out_length_ = out_arr->length; out_position_ = 0; } @@ -416,7 +416,7 @@ class PrimitiveFilterImpl { out_position_ += length; } - constexpr int32_t byte_width() const { + constexpr int64_t byte_width() const { if constexpr (kByteWidth >= 0) { return kByteWidth; } else { @@ -425,7 +425,7 @@ class PrimitiveFilterImpl { } private: - int32_t byte_width_; + int64_t byte_width_; const uint8_t* values_is_valid_; const uint8_t* values_data_; int64_t values_null_count_; @@ -439,6 +439,8 @@ class PrimitiveFilterImpl { int64_t out_position_; }; +} // namespace + Status PrimitiveFilterExec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) { const ArraySpan& values = batch[0].array; const ArraySpan& filter = batch[1].array; @@ -468,9 +470,10 @@ Status PrimitiveFilterExec(KernelContext* ctx, const ExecSpan& batch, ExecResult // validity bitmap. const bool allocate_validity = values.null_count != 0 || !filter_null_count_is_zero; - const int bit_width = values.type->bit_width(); - RETURN_NOT_OK(PreallocatePrimitiveArrayData(ctx, output_length, bit_width, - allocate_validity, out_arr)); + DCHECK(util::IsFixedWidthLike(values)); + const int64_t bit_width = util::FixedWidthInBits(*values.type); + RETURN_NOT_OK(util::internal::PreallocateFixedWidthArrayData( + ctx, output_length, /*source=*/values, allocate_validity, out_arr)); switch (bit_width) { case 1: @@ -505,6 +508,8 @@ Status PrimitiveFilterExec(KernelContext* ctx, const ExecSpan& batch, ExecResult return Status::OK(); } +namespace { + // ---------------------------------------------------------------------- // Optimized filter for base binary types (32-bit and 64-bit) @@ -924,12 +929,26 @@ Result> FilterRecordBatch(const RecordBatch& batch, return Status::Invalid("Filter inputs must all be the same length"); } - // Convert filter to selection vector/indices and use Take + // Fetch filter const auto& filter_opts = *static_cast(options); - ARROW_ASSIGN_OR_RAISE( - std::shared_ptr indices, - GetTakeIndices(*filter.array(), filter_opts.null_selection_behavior, - ctx->memory_pool())); + ArrayData filter_array; + switch (filter.kind()) { + case Datum::ARRAY: + filter_array = *filter.array(); + break; + case Datum::CHUNKED_ARRAY: { + ARROW_ASSIGN_OR_RAISE(auto combined, Concatenate(filter.chunked_array()->chunks())); + filter_array = *combined->data(); + break; + } + default: + return Status::TypeError("Filter should be array-like"); + } + + // Convert filter to selection vector/indices and use Take + ARROW_ASSIGN_OR_RAISE(std::shared_ptr indices, + GetTakeIndices(filter_array, filter_opts.null_selection_behavior, + ctx->memory_pool())); std::vector> columns(batch.num_columns()); for (int i = 0; i < batch.num_columns(); ++i) { ARROW_ASSIGN_OR_RAISE(Datum out, Take(batch.column(i)->data(), Datum(indices), @@ -1038,7 +1057,6 @@ class FilterMetaFunction : public MetaFunction { } if (args[0].kind() == Datum::RECORD_BATCH) { - auto values_batch = args[0].record_batch(); ARROW_ASSIGN_OR_RAISE( std::shared_ptr out_batch, FilterRecordBatch(*args[0].record_batch(), args[1], options, ctx)); diff --git a/cpp/src/arrow/compute/kernels/vector_selection_internal.cc b/cpp/src/arrow/compute/kernels/vector_selection_internal.cc index a0fe2808e3e4e..2ba660e49ac38 100644 --- a/cpp/src/arrow/compute/kernels/vector_selection_internal.cc +++ b/cpp/src/arrow/compute/kernels/vector_selection_internal.cc @@ -37,6 +37,7 @@ #include "arrow/util/bit_block_counter.h" #include "arrow/util/bit_run_reader.h" #include "arrow/util/bit_util.h" +#include "arrow/util/fixed_width_internal.h" #include "arrow/util/int_util.h" #include "arrow/util/logging.h" #include "arrow/util/ree_util.h" @@ -65,24 +66,6 @@ void RegisterSelectionFunction(const std::string& name, FunctionDoc doc, DCHECK_OK(registry->AddFunction(std::move(func))); } -Status PreallocatePrimitiveArrayData(KernelContext* ctx, int64_t length, int bit_width, - bool allocate_validity, ArrayData* out) { - // Preallocate memory - out->length = length; - out->buffers.resize(2); - - if (allocate_validity) { - ARROW_ASSIGN_OR_RAISE(out->buffers[0], ctx->AllocateBitmap(length)); - } - if (bit_width == 1) { - ARROW_ASSIGN_OR_RAISE(out->buffers[1], ctx->AllocateBitmap(length)); - } else { - ARROW_ASSIGN_OR_RAISE(out->buffers[1], - ctx->Allocate(bit_util::BytesForBits(length * bit_width))); - } - return Status::OK(); -} - namespace { /// \brief Iterate over a REE filter, emitting ranges of a plain values array that @@ -909,6 +892,20 @@ Status LargeListFilterExec(KernelContext* ctx, const ExecSpan& batch, ExecResult } Status FSLFilterExec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) { + const ArraySpan& values = batch[0].array; + + // If a FixedSizeList wraps a fixed-width type we can, in some cases, use + // PrimitiveFilterExec for a fixed-size list array. + if (util::IsFixedWidthLike(values, + /*force_null_count=*/true, + /*exclude_bool_and_dictionary=*/true)) { + const auto byte_width = util::FixedWidthInBytes(*values.type); + // 0 is a valid byte width for FixedSizeList, but PrimitiveFilterExec + // might not handle it correctly. + if (byte_width > 0) { + return PrimitiveFilterExec(ctx, batch, out); + } + } return FilterExec(ctx, batch, out); } @@ -968,6 +965,29 @@ Status LargeListTakeExec(KernelContext* ctx, const ExecSpan& batch, ExecResult* } Status FSLTakeExec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) { + const ArraySpan& values = batch[0].array; + + // If a FixedSizeList wraps a fixed-width type we can, in some cases, use + // PrimitiveTakeExec for a fixed-size list array. + if (util::IsFixedWidthLike(values, + /*force_null_count=*/true, + /*exclude_bool_and_dictionary=*/true)) { + const auto byte_width = util::FixedWidthInBytes(*values.type); + // Additionally, PrimitiveTakeExec is only implemented for specific byte widths. + // TODO(GH-41301): Extend PrimitiveTakeExec for any fixed-width type. + switch (byte_width) { + case 1: + case 2: + case 4: + case 8: + case 16: + case 32: + return PrimitiveTakeExec(ctx, batch, out); + default: + break; // fallback to TakeExec + } + } + return TakeExec(ctx, batch, out); } diff --git a/cpp/src/arrow/compute/kernels/vector_selection_internal.h b/cpp/src/arrow/compute/kernels/vector_selection_internal.h index 95f3e51cd67e3..a169f4b38a2b8 100644 --- a/cpp/src/arrow/compute/kernels/vector_selection_internal.h +++ b/cpp/src/arrow/compute/kernels/vector_selection_internal.h @@ -45,12 +45,6 @@ void RegisterSelectionFunction(const std::string& name, FunctionDoc doc, const FunctionOptions* default_options, FunctionRegistry* registry); -/// \brief Allocate an ArrayData for a primitive array with a given length and bit width -/// -/// \param[in] bit_width 1 or a multiple of 8 -Status PreallocatePrimitiveArrayData(KernelContext* ctx, int64_t length, int bit_width, - bool allocate_validity, ArrayData* out); - /// \brief Callback type for VisitPlainxREEFilterOutputSegments. /// /// position is the logical position in the values array relative to its offset. @@ -70,6 +64,7 @@ void VisitPlainxREEFilterOutputSegments( FilterOptions::NullSelectionBehavior null_selection, const EmitREEFilterSegment& emit_segment); +Status PrimitiveFilterExec(KernelContext*, const ExecSpan&, ExecResult*); Status ListFilterExec(KernelContext*, const ExecSpan&, ExecResult*); Status LargeListFilterExec(KernelContext*, const ExecSpan&, ExecResult*); Status FSLFilterExec(KernelContext*, const ExecSpan&, ExecResult*); diff --git a/cpp/src/arrow/compute/kernels/vector_selection_take_internal.cc b/cpp/src/arrow/compute/kernels/vector_selection_take_internal.cc index 5cd3710828485..1a9af0efcd700 100644 --- a/cpp/src/arrow/compute/kernels/vector_selection_take_internal.cc +++ b/cpp/src/arrow/compute/kernels/vector_selection_take_internal.cc @@ -37,6 +37,7 @@ #include "arrow/util/bit_block_counter.h" #include "arrow/util/bit_run_reader.h" #include "arrow/util/bit_util.h" +#include "arrow/util/fixed_width_internal.h" #include "arrow/util/int_util.h" #include "arrow/util/ree_util.h" @@ -323,7 +324,7 @@ namespace { using TakeState = OptionsWrapper; // ---------------------------------------------------------------------- -// Implement optimized take for primitive types from boolean to 1/2/4/8-byte +// Implement optimized take for primitive types from boolean to 1/2/4/8/16/32-byte // C-type based types. Use common implementation for every byte width and only // generate code for unsigned integer indices, since after boundschecking to // check for negative numbers in the indices we can safely reinterpret_cast @@ -333,16 +334,20 @@ using TakeState = OptionsWrapper; /// use the logical Arrow type but rather the physical C type. This way we /// only generate one take function for each byte width. /// -/// This function assumes that the indices have been boundschecked. +/// Also note that this function can also handle fixed-size-list arrays if +/// they fit the criteria described in fixed_width_internal.h, so use the +/// function defined in that file to access values and destination pointers +/// and DO NOT ASSUME `values.type()` is a primitive type. +/// +/// \pre the indices have been boundschecked template struct PrimitiveTakeImpl { static constexpr int kValueWidth = ValueWidthConstant::value; static void Exec(const ArraySpan& values, const ArraySpan& indices, ArrayData* out_arr) { - DCHECK_EQ(values.type->byte_width(), kValueWidth); - const auto* values_data = - values.GetValues(1, 0) + kValueWidth * values.offset; + DCHECK_EQ(util::FixedWidthInBytes(*values.type), kValueWidth); + const auto* values_data = util::OffsetPointerOfFixedByteWidthValues(values); const uint8_t* values_is_valid = values.buffers[0].data; auto values_offset = values.offset; @@ -350,16 +355,15 @@ struct PrimitiveTakeImpl { const uint8_t* indices_is_valid = indices.buffers[0].data; auto indices_offset = indices.offset; - auto out = out_arr->GetMutableValues(1, 0) + kValueWidth * out_arr->offset; + DCHECK_EQ(out_arr->offset, 0); + auto* out = util::MutableFixedWidthValuesPointer(out_arr); auto out_is_valid = out_arr->buffers[0]->mutable_data(); - auto out_offset = out_arr->offset; - DCHECK_EQ(out_offset, 0); // If either the values or indices have nulls, we preemptively zero out the // out validity bitmap so that we don't have to use ClearBit in each // iteration for nulls. if (values.null_count != 0 || indices.null_count != 0) { - bit_util::SetBitsTo(out_is_valid, out_offset, indices.length, false); + bit_util::SetBitsTo(out_is_valid, 0, indices.length, false); } auto WriteValue = [&](int64_t position) { @@ -386,7 +390,7 @@ struct PrimitiveTakeImpl { valid_count += block.popcount; if (block.popcount == block.length) { // Fastest path: neither values nor index nulls - bit_util::SetBitsTo(out_is_valid, out_offset + position, block.length, true); + bit_util::SetBitsTo(out_is_valid, position, block.length, true); for (int64_t i = 0; i < block.length; ++i) { WriteValue(position); ++position; @@ -396,7 +400,7 @@ struct PrimitiveTakeImpl { for (int64_t i = 0; i < block.length; ++i) { if (bit_util::GetBit(indices_is_valid, indices_offset + position)) { // index is not null - bit_util::SetBit(out_is_valid, out_offset + position); + bit_util::SetBit(out_is_valid, position); WriteValue(position); } else { WriteZero(position); @@ -416,7 +420,7 @@ struct PrimitiveTakeImpl { values_offset + indices_data[position])) { // value is not null WriteValue(position); - bit_util::SetBit(out_is_valid, out_offset + position); + bit_util::SetBit(out_is_valid, position); ++valid_count; } else { WriteZero(position); @@ -433,7 +437,7 @@ struct PrimitiveTakeImpl { values_offset + indices_data[position])) { // index is not null && value is not null WriteValue(position); - bit_util::SetBit(out_is_valid, out_offset + position); + bit_util::SetBit(out_is_valid, position); ++valid_count; } else { WriteZero(position); @@ -584,14 +588,16 @@ Status PrimitiveTakeExec(KernelContext* ctx, const ExecSpan& batch, ExecResult* ArrayData* out_arr = out->array_data().get(); - const int bit_width = values.type->bit_width(); + DCHECK(util::IsFixedWidthLike(values)); + const int64_t bit_width = util::FixedWidthInBits(*values.type); // TODO: When neither values nor indices contain nulls, we can skip // allocating the validity bitmap altogether and save time and space. A // streamlined PrimitiveTakeImpl would need to be written that skips all // interactions with the output validity bitmap, though. - RETURN_NOT_OK(PreallocatePrimitiveArrayData(ctx, indices.length, bit_width, - /*allocate_validity=*/true, out_arr)); + RETURN_NOT_OK(util::internal::PreallocateFixedWidthArrayData( + ctx, indices.length, /*source=*/values, + /*allocate_validity=*/true, out_arr)); switch (bit_width) { case 1: TakeIndexDispatch(values, indices, out_arr); diff --git a/cpp/src/arrow/compute/kernels/vector_selection_test.cc b/cpp/src/arrow/compute/kernels/vector_selection_test.cc index ec94b328ea361..6261fa2daec5f 100644 --- a/cpp/src/arrow/compute/kernels/vector_selection_test.cc +++ b/cpp/src/arrow/compute/kernels/vector_selection_test.cc @@ -23,12 +23,14 @@ #include #include +#include "arrow/array/builder_nested.h" #include "arrow/array/concatenate.h" #include "arrow/chunked_array.h" #include "arrow/compute/api.h" #include "arrow/compute/kernels/test_util.h" #include "arrow/table.h" #include "arrow/testing/builder.h" +#include "arrow/testing/fixed_width_test_util.h" #include "arrow/testing/gtest_util.h" #include "arrow/testing/random.h" #include "arrow/testing/util.h" @@ -726,7 +728,37 @@ TEST_F(TestFilterKernelWithLargeList, FilterListInt32) { "[[1,2], null, null]"); } -class TestFilterKernelWithFixedSizeList : public TestFilterKernel {}; +class TestFilterKernelWithFixedSizeList : public TestFilterKernel { + protected: + std::vector> five_length_filters_ = { + ArrayFromJSON(boolean(), "[false, false, false, false, false]"), + ArrayFromJSON(boolean(), "[true, true, true, true, true]"), + ArrayFromJSON(boolean(), "[false, true, true, false, true]"), + ArrayFromJSON(boolean(), "[null, true, null, false, true]"), + }; + + void AssertFilterOnNestedLists(const std::shared_ptr& inner_type, + const std::vector& list_sizes) { + using NLG = ::arrow::util::internal::NestedListGenerator; + constexpr int64_t kLength = 5; + // Create two equivalent lists: one as a FixedSizeList and another as a List. + ASSERT_OK_AND_ASSIGN(auto fsl_list, + NLG::NestedFSLArray(inner_type, list_sizes, kLength)); + ASSERT_OK_AND_ASSIGN(auto list, + NLG::NestedListArray(inner_type, list_sizes, kLength)); + + ARROW_SCOPED_TRACE("CheckTakeOnNestedLists of type `", *fsl_list->type(), "`"); + + for (auto& filter : five_length_filters_) { + // Use the Filter on ListType as the reference implementation. + ASSERT_OK_AND_ASSIGN(auto expected_list, + Filter(*list, *filter, /*options=*/emit_null_)); + ASSERT_OK_AND_ASSIGN(auto expected_fsl, Cast(expected_list, fsl_list->type())); + auto expected_fsl_array = expected_fsl.make_array(); + this->AssertFilter(fsl_list, filter, expected_fsl_array); + } + } +}; TEST_F(TestFilterKernelWithFixedSizeList, FilterFixedSizeListInt32) { std::string list_json = "[null, [1, null, 3], [4, 5, 6], [7, 8, null]]"; @@ -740,6 +772,33 @@ TEST_F(TestFilterKernelWithFixedSizeList, FilterFixedSizeListInt32) { "[[1, null, 3], [7, 8, null]]"); } +TEST_F(TestFilterKernelWithFixedSizeList, FilterFixedSizeListVarWidth) { + std::string list_json = + R"([["zero", "one", ""], ["two", "", "three"], ["four", "five", "six"], ["seven", "eight", ""]])"; + this->AssertFilter(fixed_size_list(utf8(), 3), list_json, "[0, 0, 0, 0]", "[]"); + this->AssertFilter(fixed_size_list(utf8(), 3), list_json, "[0, 1, 1, null]", + R"([["two", "", "three"], ["four", "five", "six"], null])"); + this->AssertFilter(fixed_size_list(utf8(), 3), list_json, "[0, 0, 1, null]", + R"([["four", "five", "six"], null])"); + this->AssertFilter(fixed_size_list(utf8(), 3), list_json, "[1, 1, 1, 1]", list_json); + this->AssertFilter(fixed_size_list(utf8(), 3), list_json, "[0, 1, 0, 1]", + R"([["two", "", "three"], ["seven", "eight", ""]])"); +} + +TEST_F(TestFilterKernelWithFixedSizeList, FilterFixedSizeListModuloNesting) { + using NLG = ::arrow::util::internal::NestedListGenerator; + const std::vector> value_types = { + int16(), + int32(), + int64(), + }; + NLG::VisitAllNestedListConfigurations( + value_types, [this](const std::shared_ptr& inner_type, + const std::vector& list_sizes) { + this->AssertFilterOnNestedLists(inner_type, list_sizes); + }); +} + class TestFilterKernelWithMap : public TestFilterKernel {}; TEST_F(TestFilterKernelWithMap, FilterMapStringToInt32) { @@ -1034,29 +1093,34 @@ Status TakeJSON(const std::shared_ptr& type, const std::string& values .Value(out); } +void DoCheckTake(const std::shared_ptr& values, + const std::shared_ptr& indices, + const std::shared_ptr& expected) { + AssertTakeArrays(values, indices, expected); + + // Check sliced values + ASSERT_OK_AND_ASSIGN(auto values_filler, MakeArrayOfNull(values->type(), 2)); + ASSERT_OK_AND_ASSIGN(auto values_sliced, + Concatenate({values_filler, values, values_filler})); + values_sliced = values_sliced->Slice(2, values->length()); + AssertTakeArrays(values_sliced, indices, expected); + + // Check sliced indices + ASSERT_OK_AND_ASSIGN(auto zero, MakeScalar(indices->type(), int8_t{0})); + ASSERT_OK_AND_ASSIGN(auto indices_filler, MakeArrayFromScalar(*zero, 3)); + ASSERT_OK_AND_ASSIGN(auto indices_sliced, + Concatenate({indices_filler, indices, indices_filler})); + indices_sliced = indices_sliced->Slice(3, indices->length()); + AssertTakeArrays(values, indices_sliced, expected); +} + void CheckTake(const std::shared_ptr& type, const std::string& values_json, const std::string& indices_json, const std::string& expected_json) { auto values = ArrayFromJSON(type, values_json); auto expected = ArrayFromJSON(type, expected_json); - for (auto index_type : {int8(), uint32()}) { auto indices = ArrayFromJSON(index_type, indices_json); - AssertTakeArrays(values, indices, expected); - - // Check sliced values - ASSERT_OK_AND_ASSIGN(auto values_filler, MakeArrayOfNull(type, 2)); - ASSERT_OK_AND_ASSIGN(auto values_sliced, - Concatenate({values_filler, values, values_filler})); - values_sliced = values_sliced->Slice(2, values->length()); - AssertTakeArrays(values_sliced, indices, expected); - - // Check sliced indices - ASSERT_OK_AND_ASSIGN(auto zero, MakeScalar(index_type, int8_t{0})); - ASSERT_OK_AND_ASSIGN(auto indices_filler, MakeArrayFromScalar(*zero, 3)); - ASSERT_OK_AND_ASSIGN(auto indices_sliced, - Concatenate({indices_filler, indices, indices_filler})); - indices_sliced = indices_sliced->Slice(3, indices->length()); - AssertTakeArrays(values, indices_sliced, expected); + DoCheckTake(values, indices, expected); } } @@ -1427,7 +1491,25 @@ TEST_F(TestTakeKernelWithLargeList, TakeLargeListInt32) { CheckTake(large_list(int32()), list_json, "[null, 1, 2, 0]", "[null, [1,2], null, []]"); } -class TestTakeKernelWithFixedSizeList : public TestTakeKernelTyped {}; +class TestTakeKernelWithFixedSizeList : public TestTakeKernelTyped { + protected: + void CheckTakeOnNestedLists(const std::shared_ptr& inner_type, + const std::vector& list_sizes, int64_t length) { + using NLG = ::arrow::util::internal::NestedListGenerator; + // Create two equivalent lists: one as a FixedSizeList and another as a List. + ASSERT_OK_AND_ASSIGN(auto fsl_list, + NLG::NestedFSLArray(inner_type, list_sizes, length)); + ASSERT_OK_AND_ASSIGN(auto list, NLG::NestedListArray(inner_type, list_sizes, length)); + + ARROW_SCOPED_TRACE("CheckTakeOnNestedLists of type `", *fsl_list->type(), "`"); + + auto indices = ArrayFromJSON(int64(), "[1, 2, 4]"); + // Use the Take on ListType as the reference implementation. + ASSERT_OK_AND_ASSIGN(auto expected_list, Take(*list, *indices)); + ASSERT_OK_AND_ASSIGN(auto expected_fsl, Cast(*expected_list, fsl_list->type())); + DoCheckTake(fsl_list, indices, expected_fsl); + } +}; TEST_F(TestTakeKernelWithFixedSizeList, TakeFixedSizeListInt32) { std::string list_json = "[null, [1, null, 3], [4, 5, 6], [7, 8, null]]"; @@ -1449,6 +1531,42 @@ TEST_F(TestTakeKernelWithFixedSizeList, TakeFixedSizeListInt32) { "[0, 1, 0]"); } +TEST_F(TestTakeKernelWithFixedSizeList, TakeFixedSizeListVarWidth) { + std::string list_json = + R"([["zero", "one", ""], ["two", "", "three"], ["four", "five", "six"], ["seven", "eight", ""]])"; + CheckTake(fixed_size_list(utf8(), 3), list_json, "[]", "[]"); + CheckTake(fixed_size_list(utf8(), 3), list_json, "[3, 2, 1]", + R"([["seven", "eight", ""], ["four", "five", "six"], ["two", "", "three"]])"); + CheckTake(fixed_size_list(utf8(), 3), list_json, "[null, 2, 0]", + R"([null, ["four", "five", "six"], ["zero", "one", ""]])"); + CheckTake(fixed_size_list(utf8(), 3), list_json, R"([null, null])", "[null, null]"); + CheckTake( + fixed_size_list(utf8(), 3), list_json, "[3, 0, 0,3]", + R"([["seven", "eight", ""], ["zero", "one", ""], ["zero", "one", ""], ["seven", "eight", ""]])"); + CheckTake(fixed_size_list(utf8(), 3), list_json, "[0, 1, 2, 3]", list_json); + CheckTake(fixed_size_list(utf8(), 3), list_json, "[2, 2, 2, 2, 2, 2, 1]", + R"([ + ["four", "five", "six"], ["four", "five", "six"], + ["four", "five", "six"], ["four", "five", "six"], + ["four", "five", "six"], ["four", "five", "six"], + ["two", "", "three"] + ])"); +} + +TEST_F(TestTakeKernelWithFixedSizeList, TakeFixedSizeListModuloNesting) { + using NLG = ::arrow::util::internal::NestedListGenerator; + const std::vector> value_types = { + int16(), + int32(), + int64(), + }; + NLG::VisitAllNestedListConfigurations( + value_types, [this](const std::shared_ptr& inner_type, + const std::vector& list_sizes) { + this->CheckTakeOnNestedLists(inner_type, list_sizes, /*length=*/5); + }); +} + class TestTakeKernelWithMap : public TestTakeKernelTyped {}; TEST_F(TestTakeKernelWithMap, TakeMapStringToInt32) { diff --git a/cpp/src/arrow/compute/kernels/vector_sort.cc b/cpp/src/arrow/compute/kernels/vector_sort.cc index db2023ef04cad..ad22fa8d365c4 100644 --- a/cpp/src/arrow/compute/kernels/vector_sort.cc +++ b/cpp/src/arrow/compute/kernels/vector_sort.cc @@ -747,15 +747,13 @@ class TableSorter { auto& comparator = comparator_; const auto& first_sort_key = sort_keys_[0]; - ChunkLocation left_loc{0, 0}; - ChunkLocation right_loc{0, 0}; + ChunkLocation left_loc; + ChunkLocation right_loc; std::merge(nulls_begin, nulls_middle, nulls_middle, nulls_end, temp_indices, [&](uint64_t left, uint64_t right) { // First column is either null or nan - left_loc = - left_resolver_.ResolveWithChunkIndexHint(left, /*hint=*/left_loc); - right_loc = - right_resolver_.ResolveWithChunkIndexHint(right, /*hint=*/right_loc); + left_loc = left_resolver_.ResolveWithHint(left, /*hint=*/left_loc); + right_loc = right_resolver_.ResolveWithHint(right, /*hint=*/right_loc); auto chunk_left = first_sort_key.GetChunk(left_loc); auto chunk_right = first_sort_key.GetChunk(right_loc); const auto left_is_null = chunk_left.IsNull(); @@ -786,15 +784,13 @@ class TableSorter { // Untyped implementation auto& comparator = comparator_; - ChunkLocation left_loc{0, 0}; - ChunkLocation right_loc{0, 0}; + ChunkLocation left_loc; + ChunkLocation right_loc; std::merge(nulls_begin, nulls_middle, nulls_middle, nulls_end, temp_indices, [&](uint64_t left, uint64_t right) { // First column is always null - left_loc = - left_resolver_.ResolveWithChunkIndexHint(left, /*hint=*/left_loc); - right_loc = - right_resolver_.ResolveWithChunkIndexHint(right, /*hint=*/right_loc); + left_loc = left_resolver_.ResolveWithHint(left, /*hint=*/left_loc); + right_loc = right_resolver_.ResolveWithHint(right, /*hint=*/right_loc); return comparator.Compare(left_loc, right_loc, 1); }); // Copy back temp area into main buffer @@ -812,15 +808,13 @@ class TableSorter { auto& comparator = comparator_; const auto& first_sort_key = sort_keys_[0]; - ChunkLocation left_loc{0, 0}; - ChunkLocation right_loc{0, 0}; + ChunkLocation left_loc; + ChunkLocation right_loc; std::merge(range_begin, range_middle, range_middle, range_end, temp_indices, [&](uint64_t left, uint64_t right) { // Both values are never null nor NaN. - left_loc = - left_resolver_.ResolveWithChunkIndexHint(left, /*hint=*/left_loc); - right_loc = - right_resolver_.ResolveWithChunkIndexHint(right, /*hint=*/right_loc); + left_loc = left_resolver_.ResolveWithHint(left, /*hint=*/left_loc); + right_loc = right_resolver_.ResolveWithHint(right, /*hint=*/right_loc); auto chunk_left = first_sort_key.GetChunk(left_loc); auto chunk_right = first_sort_key.GetChunk(right_loc); DCHECK(!chunk_left.IsNull()); diff --git a/cpp/src/arrow/compute/key_hash_internal.h b/cpp/src/arrow/compute/key_hash_internal.h index 7d226f52086b1..1f25beb0e1622 100644 --- a/cpp/src/arrow/compute/key_hash_internal.h +++ b/cpp/src/arrow/compute/key_hash_internal.h @@ -48,6 +48,16 @@ class ARROW_EXPORT Hashing32 { static void HashMultiColumn(const std::vector& cols, LightContext* ctx, uint32_t* out_hash); + // Clarify the max temp stack usage for HashBatch, which might be necessary for the + // caller to be aware of at compile time to reserve enough stack size in advance. The + // HashBatch implementation uses one uint32 temp vector as a buffer for hash, one uint16 + // temp vector as a buffer for null indices and one uint32 temp vector as a buffer for + // null hash, all are of size kMiniBatchLength. Plus extra kMiniBatchLength to cope with + // stack padding and aligning. + static constexpr auto kHashBatchTempStackUsage = + (sizeof(uint32_t) + sizeof(uint16_t) + sizeof(uint32_t) + /*extra=*/1) * + util::MiniBatch::kMiniBatchLength; + static Status HashBatch(const ExecBatch& key_batch, uint32_t* hashes, std::vector& column_arrays, int64_t hardware_flags, util::TempVectorStack* temp_stack, @@ -161,6 +171,15 @@ class ARROW_EXPORT Hashing64 { static void HashMultiColumn(const std::vector& cols, LightContext* ctx, uint64_t* hashes); + // Clarify the max temp stack usage for HashBatch, which might be necessary for the + // caller to be aware of at compile time to reserve enough stack size in advance. The + // HashBatch implementation uses one uint16 temp vector as a buffer for null indices and + // one uint64 temp vector as a buffer for null hash, all are of size kMiniBatchLength. + // Plus extra kMiniBatchLength to cope with stack padding and aligning. + static constexpr auto kHashBatchTempStackUsage = + (sizeof(uint16_t) + sizeof(uint64_t) + /*extra=*/1) * + util::MiniBatch::kMiniBatchLength; + static Status HashBatch(const ExecBatch& key_batch, uint64_t* hashes, std::vector& column_arrays, int64_t hardware_flags, util::TempVectorStack* temp_stack, diff --git a/cpp/src/arrow/compute/key_hash_test.cc b/cpp/src/arrow/compute/key_hash_test.cc index 4e5d869cb7db6..fdf6d2125850a 100644 --- a/cpp/src/arrow/compute/key_hash_test.cc +++ b/cpp/src/arrow/compute/key_hash_test.cc @@ -25,12 +25,16 @@ #include "arrow/array/builder_binary.h" #include "arrow/compute/key_hash_internal.h" #include "arrow/testing/gtest_util.h" +#include "arrow/testing/random.h" #include "arrow/testing/util.h" #include "arrow/util/cpu_info.h" #include "arrow/util/pcg_random.h" namespace arrow { +using arrow::random::RandomArrayGenerator; +using arrow::util::MiniBatch; +using arrow::util::TempVectorStack; using internal::checked_pointer_cast; using internal::CpuInfo; @@ -156,7 +160,7 @@ class TestVectorHash { std::vector temp_buffer; temp_buffer.resize(mini_batch_size * 4); - for (int i = 0; i < static_cast(hardware_flags_for_testing.size()); ++i) { + for (size_t i = 0; i < hardware_flags_for_testing.size(); ++i) { const auto hardware_flags = hardware_flags_for_testing[i]; if (use_32bit_hash) { if (!use_varlen_input) { @@ -192,7 +196,7 @@ class TestVectorHash { // Verify that all implementations (scalar, SIMD) give the same hashes // const auto& hashes_scalar64 = hashes64[0]; - for (int i = 0; i < static_cast(hardware_flags_for_testing.size()); ++i) { + for (size_t i = 0; i < hardware_flags_for_testing.size(); ++i) { for (int j = 0; j < num_rows; ++j) { ASSERT_EQ(hashes64[i][j], hashes_scalar64[j]) << "scalar and simd approaches yielded different hashes"; @@ -280,7 +284,7 @@ void HashFixedLengthFrom(int key_length, int num_rows, int start_row) { std::vector temp_buffer; temp_buffer.resize(mini_batch_size * 4); - for (int i = 0; i < static_cast(hardware_flags_for_testing.size()); ++i) { + for (size_t i = 0; i < hardware_flags_for_testing.size(); ++i) { const auto hardware_flags = hardware_flags_for_testing[i]; Hashing32::HashFixed(hardware_flags, /*combine_hashes=*/false, num_rows_to_hash, key_length, @@ -292,7 +296,7 @@ void HashFixedLengthFrom(int key_length, int num_rows, int start_row) { } // Verify that all implementations (scalar, SIMD) give the same hashes. - for (int i = 1; i < static_cast(hardware_flags_for_testing.size()); ++i) { + for (size_t i = 1; i < hardware_flags_for_testing.size(); ++i) { for (int j = 0; j < num_rows_to_hash; ++j) { ASSERT_EQ(hashes32[i][j], hashes32[0][j]) << "scalar and simd approaches yielded different 32-bit hashes"; @@ -311,5 +315,52 @@ TEST(VectorHash, FixedLengthTailByteSafety) { HashFixedLengthFrom(/*key_length=*/19, /*num_rows=*/64, /*start_row=*/63); } +// Make sure that Hashing32/64::HashBatch uses no more stack space than declared in +// Hashing32/64::kHashBatchTempStackUsage. +TEST(VectorHash, HashBatchTempStackUsage) { + for (auto num_rows : + {0, 1, MiniBatch::kMiniBatchLength, MiniBatch::kMiniBatchLength * 64}) { + SCOPED_TRACE("num_rows = " + std::to_string(num_rows)); + + MemoryPool* pool = default_memory_pool(); + RandomArrayGenerator gen(42); + + auto column = gen.Int8(num_rows, 0, 127); + ExecBatch batch({column}, num_rows); + + std::vector column_arrays; + ASSERT_OK(ColumnArraysFromExecBatch(batch, &column_arrays)); + + const auto hardware_flags_for_testing = HardwareFlagsForTesting(); + ASSERT_GT(hardware_flags_for_testing.size(), 0); + + { + std::vector hashes(num_rows); + TempVectorStack stack; + ASSERT_OK(stack.Init(pool, Hashing32::kHashBatchTempStackUsage)); + for (size_t i = 0; i < hardware_flags_for_testing.size(); ++i) { + SCOPED_TRACE("hashing32 for hardware flags = " + + std::to_string(hardware_flags_for_testing[i])); + ASSERT_OK(Hashing32::HashBatch(batch, hashes.data(), column_arrays, + hardware_flags_for_testing[i], &stack, + /*start_rows=*/0, num_rows)); + } + } + + { + std::vector hashes(num_rows); + TempVectorStack stack; + ASSERT_OK(stack.Init(pool, Hashing64::kHashBatchTempStackUsage)); + for (size_t i = 0; i < hardware_flags_for_testing.size(); ++i) { + SCOPED_TRACE("hashing64 for hardware flags = " + + std::to_string(hardware_flags_for_testing[i])); + ASSERT_OK(Hashing64::HashBatch(batch, hashes.data(), column_arrays, + hardware_flags_for_testing[i], &stack, + /*start_rows=*/0, num_rows)); + } + } + } +} + } // namespace compute } // namespace arrow diff --git a/cpp/src/arrow/compute/key_map_internal.h b/cpp/src/arrow/compute/key_map_internal.h index 8e06dc83483aa..a5e784a9e4463 100644 --- a/cpp/src/arrow/compute/key_map_internal.h +++ b/cpp/src/arrow/compute/key_map_internal.h @@ -21,6 +21,7 @@ #include #include "arrow/compute/util.h" +#include "arrow/compute/util_internal.h" #include "arrow/result.h" #include "arrow/status.h" #include "arrow/type_fwd.h" diff --git a/cpp/src/arrow/compute/light_array_internal.h b/cpp/src/arrow/compute/light_array_internal.h index 67de71bf56c92..995c4211998e0 100644 --- a/cpp/src/arrow/compute/light_array_internal.h +++ b/cpp/src/arrow/compute/light_array_internal.h @@ -22,6 +22,7 @@ #include "arrow/array.h" #include "arrow/compute/exec.h" #include "arrow/compute/util.h" +#include "arrow/compute/util_internal.h" #include "arrow/type.h" #include "arrow/util/cpu_info.h" #include "arrow/util/logging.h" diff --git a/cpp/src/arrow/compute/light_array_test.cc b/cpp/src/arrow/compute/light_array_test.cc index 08f36ee606025..cc02d489d138f 100644 --- a/cpp/src/arrow/compute/light_array_test.cc +++ b/cpp/src/arrow/compute/light_array_test.cc @@ -20,6 +20,7 @@ #include #include +#include "arrow/memory_pool.h" #include "arrow/testing/generator.h" #include "arrow/testing/gtest_util.h" #include "arrow/type.h" diff --git a/cpp/src/arrow/compute/row/CMakeLists.txt b/cpp/src/arrow/compute/row/CMakeLists.txt index 6ae982dbaf3a7..ef03c767f974e 100644 --- a/cpp/src/arrow/compute/row/CMakeLists.txt +++ b/cpp/src/arrow/compute/row/CMakeLists.txt @@ -19,3 +19,5 @@ # in a row-major order. arrow_install_all_headers("arrow/compute/row") + +add_arrow_benchmark(grouper_benchmark PREFIX "arrow-compute") diff --git a/cpp/src/arrow/compute/row/compare_internal.cc b/cpp/src/arrow/compute/row/compare_internal.cc index 078a8287c71c0..98aea9011266c 100644 --- a/cpp/src/arrow/compute/row/compare_internal.cc +++ b/cpp/src/arrow/compute/row/compare_internal.cc @@ -36,22 +36,22 @@ void KeyCompare::NullUpdateColumnToRow(uint32_t id_col, uint32_t num_rows_to_com const uint32_t* left_to_right_map, LightContext* ctx, const KeyColumnArray& col, const RowTableImpl& rows, - uint8_t* match_bytevector, - bool are_cols_in_encoding_order) { + bool are_cols_in_encoding_order, + uint8_t* match_bytevector) { if (!rows.has_any_nulls(ctx) && !col.data(0)) { return; } uint32_t num_processed = 0; #if defined(ARROW_HAVE_RUNTIME_AVX2) if (ctx->has_avx2()) { - num_processed = NullUpdateColumnToRow_avx2(use_selection, id_col, num_rows_to_compare, - sel_left_maybe_null, left_to_right_map, - ctx, col, rows, match_bytevector); + num_processed = NullUpdateColumnToRow_avx2( + use_selection, id_col, num_rows_to_compare, sel_left_maybe_null, + left_to_right_map, ctx, col, rows, are_cols_in_encoding_order, match_bytevector); } #endif - uint32_t null_bit_id = - are_cols_in_encoding_order ? id_col : rows.metadata().pos_after_encoding(id_col); + const uint32_t null_bit_id = + ColIdInEncodingOrder(rows, id_col, are_cols_in_encoding_order); if (!col.data(0)) { // Remove rows from the result for which the column value is a null @@ -363,10 +363,9 @@ void KeyCompare::CompareColumnsToRows( continue; } - uint32_t offset_within_row = rows.metadata().encoded_field_offset( - are_cols_in_encoding_order - ? static_cast(icol) - : rows.metadata().pos_after_encoding(static_cast(icol))); + uint32_t offset_within_row = + rows.metadata().encoded_field_offset(ColIdInEncodingOrder( + rows, static_cast(icol), are_cols_in_encoding_order)); if (col.metadata().is_fixed_length) { if (sel_left_maybe_null) { CompareBinaryColumnToRow( @@ -375,9 +374,8 @@ void KeyCompare::CompareColumnsToRows( is_first_column ? match_bytevector_A : match_bytevector_B); NullUpdateColumnToRow( static_cast(icol), num_rows_to_compare, sel_left_maybe_null, - left_to_right_map, ctx, col, rows, - is_first_column ? match_bytevector_A : match_bytevector_B, - are_cols_in_encoding_order); + left_to_right_map, ctx, col, rows, are_cols_in_encoding_order, + is_first_column ? match_bytevector_A : match_bytevector_B); } else { // Version without using selection vector CompareBinaryColumnToRow( @@ -386,9 +384,8 @@ void KeyCompare::CompareColumnsToRows( is_first_column ? match_bytevector_A : match_bytevector_B); NullUpdateColumnToRow( static_cast(icol), num_rows_to_compare, sel_left_maybe_null, - left_to_right_map, ctx, col, rows, - is_first_column ? match_bytevector_A : match_bytevector_B, - are_cols_in_encoding_order); + left_to_right_map, ctx, col, rows, are_cols_in_encoding_order, + is_first_column ? match_bytevector_A : match_bytevector_B); } if (!is_first_column) { AndByteVectors(ctx, num_rows_to_compare, match_bytevector_A, match_bytevector_B); @@ -414,9 +411,8 @@ void KeyCompare::CompareColumnsToRows( } NullUpdateColumnToRow( static_cast(icol), num_rows_to_compare, sel_left_maybe_null, - left_to_right_map, ctx, col, rows, - is_first_column ? match_bytevector_A : match_bytevector_B, - are_cols_in_encoding_order); + left_to_right_map, ctx, col, rows, are_cols_in_encoding_order, + is_first_column ? match_bytevector_A : match_bytevector_B); } else { if (ivarbinary == 0) { CompareVarBinaryColumnToRow( @@ -429,9 +425,8 @@ void KeyCompare::CompareColumnsToRows( } NullUpdateColumnToRow( static_cast(icol), num_rows_to_compare, sel_left_maybe_null, - left_to_right_map, ctx, col, rows, - is_first_column ? match_bytevector_A : match_bytevector_B, - are_cols_in_encoding_order); + left_to_right_map, ctx, col, rows, are_cols_in_encoding_order, + is_first_column ? match_bytevector_A : match_bytevector_B); } if (!is_first_column) { AndByteVectors(ctx, num_rows_to_compare, match_bytevector_A, match_bytevector_B); diff --git a/cpp/src/arrow/compute/row/compare_internal.h b/cpp/src/arrow/compute/row/compare_internal.h index b039ca97ff978..a5a109b0b516a 100644 --- a/cpp/src/arrow/compute/row/compare_internal.h +++ b/cpp/src/arrow/compute/row/compare_internal.h @@ -32,6 +32,16 @@ namespace compute { class ARROW_EXPORT KeyCompare { public: + // Clarify the max temp stack usage for CompareColumnsToRows, which might be necessary + // for the caller to be aware of (possibly at compile time) to reserve enough stack size + // in advance. The CompareColumnsToRows implementation uses three uint8 temp vectors as + // buffers for match vectors, all are of size num_rows. Plus extra kMiniBatchLength to + // cope with stack padding and aligning. + constexpr static int64_t CompareColumnsToRowsTempStackUsage(int64_t num_rows) { + return (sizeof(uint8_t) + sizeof(uint8_t) + sizeof(uint8_t)) * num_rows + + /*extra=*/util::MiniBatch::kMiniBatchLength; + } + // Returns a single 16-bit selection vector of rows that failed comparison. // If there is input selection on the left, the resulting selection is a filtered image // of input selection. @@ -43,13 +53,19 @@ class ARROW_EXPORT KeyCompare { uint8_t* out_match_bitvector_maybe_null = NULLPTR); private: + static uint32_t ColIdInEncodingOrder(const RowTableImpl& rows, uint32_t id_col, + bool are_cols_in_encoding_order) { + return are_cols_in_encoding_order ? id_col + : rows.metadata().pos_after_encoding(id_col); + } + template static void NullUpdateColumnToRow(uint32_t id_col, uint32_t num_rows_to_compare, const uint16_t* sel_left_maybe_null, const uint32_t* left_to_right_map, LightContext* ctx, const KeyColumnArray& col, const RowTableImpl& rows, - uint8_t* match_bytevector, - bool are_cols_in_encoding_order); + bool are_cols_in_encoding_order, + uint8_t* match_bytevector); template static void CompareBinaryColumnToRowHelper( @@ -92,7 +108,8 @@ class ARROW_EXPORT KeyCompare { static uint32_t NullUpdateColumnToRowImp_avx2( uint32_t id_col, uint32_t num_rows_to_compare, const uint16_t* sel_left_maybe_null, const uint32_t* left_to_right_map, LightContext* ctx, const KeyColumnArray& col, - const RowTableImpl& rows, uint8_t* match_bytevector); + const RowTableImpl& rows, bool are_cols_in_encoding_order, + uint8_t* match_bytevector); template static uint32_t CompareBinaryColumnToRowHelper_avx2( @@ -118,13 +135,11 @@ class ARROW_EXPORT KeyCompare { static uint32_t AndByteVectors_avx2(uint32_t num_elements, uint8_t* bytevector_A, const uint8_t* bytevector_B); - static uint32_t NullUpdateColumnToRow_avx2(bool use_selection, uint32_t id_col, - uint32_t num_rows_to_compare, - const uint16_t* sel_left_maybe_null, - const uint32_t* left_to_right_map, - LightContext* ctx, const KeyColumnArray& col, - const RowTableImpl& rows, - uint8_t* match_bytevector); + static uint32_t NullUpdateColumnToRow_avx2( + bool use_selection, uint32_t id_col, uint32_t num_rows_to_compare, + const uint16_t* sel_left_maybe_null, const uint32_t* left_to_right_map, + LightContext* ctx, const KeyColumnArray& col, const RowTableImpl& rows, + bool are_cols_in_encoding_order, uint8_t* match_bytevector); static uint32_t CompareBinaryColumnToRow_avx2( bool use_selection, uint32_t offset_within_row, uint32_t num_rows_to_compare, diff --git a/cpp/src/arrow/compute/row/compare_internal_avx2.cc b/cpp/src/arrow/compute/row/compare_internal_avx2.cc index ff407c51b83cb..18f656a2e458d 100644 --- a/cpp/src/arrow/compute/row/compare_internal_avx2.cc +++ b/cpp/src/arrow/compute/row/compare_internal_avx2.cc @@ -39,12 +39,14 @@ template uint32_t KeyCompare::NullUpdateColumnToRowImp_avx2( uint32_t id_col, uint32_t num_rows_to_compare, const uint16_t* sel_left_maybe_null, const uint32_t* left_to_right_map, LightContext* ctx, const KeyColumnArray& col, - const RowTableImpl& rows, uint8_t* match_bytevector) { + const RowTableImpl& rows, bool are_cols_in_encoding_order, + uint8_t* match_bytevector) { if (!rows.has_any_nulls(ctx) && !col.data(0)) { return num_rows_to_compare; } - uint32_t null_bit_id = rows.metadata().pos_after_encoding(id_col); + const uint32_t null_bit_id = + ColIdInEncodingOrder(rows, id_col, are_cols_in_encoding_order); if (!col.data(0)) { // Remove rows from the result for which the column value is a null @@ -569,7 +571,7 @@ uint32_t KeyCompare::NullUpdateColumnToRow_avx2( bool use_selection, uint32_t id_col, uint32_t num_rows_to_compare, const uint16_t* sel_left_maybe_null, const uint32_t* left_to_right_map, LightContext* ctx, const KeyColumnArray& col, const RowTableImpl& rows, - uint8_t* match_bytevector) { + bool are_cols_in_encoding_order, uint8_t* match_bytevector) { int64_t num_rows_safe = TailSkipForSIMD::FixBitAccess(sizeof(uint32_t), col.length(), col.bit_offset(0)); if (sel_left_maybe_null) { @@ -580,13 +582,13 @@ uint32_t KeyCompare::NullUpdateColumnToRow_avx2( } if (use_selection) { - return NullUpdateColumnToRowImp_avx2(id_col, num_rows_to_compare, - sel_left_maybe_null, left_to_right_map, - ctx, col, rows, match_bytevector); + return NullUpdateColumnToRowImp_avx2( + id_col, num_rows_to_compare, sel_left_maybe_null, left_to_right_map, ctx, col, + rows, are_cols_in_encoding_order, match_bytevector); } else { - return NullUpdateColumnToRowImp_avx2(id_col, num_rows_to_compare, - sel_left_maybe_null, left_to_right_map, - ctx, col, rows, match_bytevector); + return NullUpdateColumnToRowImp_avx2( + id_col, num_rows_to_compare, sel_left_maybe_null, left_to_right_map, ctx, col, + rows, are_cols_in_encoding_order, match_bytevector); } } diff --git a/cpp/src/arrow/compute/row/compare_test.cc b/cpp/src/arrow/compute/row/compare_test.cc index 1d8562cd56d3c..4044049b10863 100644 --- a/cpp/src/arrow/compute/row/compare_test.cc +++ b/cpp/src/arrow/compute/row/compare_test.cc @@ -19,23 +19,26 @@ #include "arrow/compute/row/compare_internal.h" #include "arrow/testing/gtest_util.h" +#include "arrow/testing/random.h" namespace arrow { namespace compute { using arrow::bit_util::BytesForBits; using arrow::internal::CpuInfo; +using arrow::random::RandomArrayGenerator; using arrow::util::MiniBatch; using arrow::util::TempVectorStack; // Specialized case for GH-39577. TEST(KeyCompare, CompareColumnsToRowsCuriousFSB) { int fsb_length = 9; + int num_rows = 7; + MemoryPool* pool = default_memory_pool(); TempVectorStack stack; - ASSERT_OK(stack.Init(pool, 8 * MiniBatch::kMiniBatchLength * sizeof(uint64_t))); + ASSERT_OK(stack.Init(pool, KeyCompare::CompareColumnsToRowsTempStackUsage(num_rows))); - int num_rows = 7; auto column_right = ArrayFromJSON(fixed_size_binary(fsb_length), R"([ "000000000", "111111111", @@ -106,5 +109,60 @@ TEST(KeyCompare, CompareColumnsToRowsCuriousFSB) { } } +// Make sure that KeyCompare::CompareColumnsToRows uses no more stack space than declared +// in KeyCompare::CompareColumnsToRowsTempStackUsage(). +TEST(KeyCompare, CompareColumnsToRowsTempStackUsage) { + for (auto num_rows : + {0, 1, MiniBatch::kMiniBatchLength, MiniBatch::kMiniBatchLength * 64}) { + SCOPED_TRACE("num_rows = " + std::to_string(num_rows)); + + MemoryPool* pool = default_memory_pool(); + TempVectorStack stack; + ASSERT_OK(stack.Init(pool, KeyCompare::CompareColumnsToRowsTempStackUsage(num_rows))); + + RandomArrayGenerator gen(42); + + auto column_right = gen.Int8(num_rows, 0, 127); + ExecBatch batch_right({column_right}, num_rows); + + std::vector column_metadatas_right; + ASSERT_OK(ColumnMetadatasFromExecBatch(batch_right, &column_metadatas_right)); + + RowTableMetadata table_metadata_right; + table_metadata_right.FromColumnMetadataVector(column_metadatas_right, + sizeof(uint64_t), sizeof(uint64_t)); + + std::vector column_arrays_right; + ASSERT_OK(ColumnArraysFromExecBatch(batch_right, &column_arrays_right)); + + RowTableImpl row_table; + ASSERT_OK(row_table.Init(pool, table_metadata_right)); + + RowTableEncoder row_encoder; + row_encoder.Init(column_metadatas_right, sizeof(uint64_t), sizeof(uint64_t)); + row_encoder.PrepareEncodeSelected(0, num_rows, column_arrays_right); + + std::vector row_ids_right(num_rows); + std::iota(row_ids_right.begin(), row_ids_right.end(), 0); + ASSERT_OK(row_encoder.EncodeSelected(&row_table, num_rows, row_ids_right.data())); + + auto column_left = gen.Int8(num_rows, 0, 127); + ExecBatch batch_left({column_left}, num_rows); + std::vector column_arrays_left; + ASSERT_OK(ColumnArraysFromExecBatch(batch_left, &column_arrays_left)); + + std::vector row_ids_left(num_rows); + std::iota(row_ids_left.begin(), row_ids_left.end(), 0); + + LightContext ctx{CpuInfo::GetInstance()->hardware_flags(), &stack}; + + uint32_t num_rows_no_match; + std::vector row_ids_out(num_rows); + KeyCompare::CompareColumnsToRows(num_rows, NULLPTR, row_ids_left.data(), &ctx, + &num_rows_no_match, row_ids_out.data(), + column_arrays_left, row_table, true, NULLPTR); + } +} + } // namespace compute } // namespace arrow diff --git a/cpp/src/arrow/compute/row/grouper.cc b/cpp/src/arrow/compute/row/grouper.cc index 756c70967ac6f..45b9ad5971e80 100644 --- a/cpp/src/arrow/compute/row/grouper.cc +++ b/cpp/src/arrow/compute/row/grouper.cc @@ -217,18 +217,18 @@ struct SimpleKeySegmenter : public BaseRowSegmenter { struct AnyKeysSegmenter : public BaseRowSegmenter { static Result> Make( const std::vector& key_types, ExecContext* ctx) { - ARROW_RETURN_NOT_OK(Grouper::Make(key_types, ctx)); // check types - return std::make_unique(key_types, ctx); + ARROW_ASSIGN_OR_RAISE(auto grouper, Grouper::Make(key_types, ctx)); // check types + return std::make_unique(key_types, ctx, std::move(grouper)); } - AnyKeysSegmenter(const std::vector& key_types, ExecContext* ctx) + AnyKeysSegmenter(const std::vector& key_types, ExecContext* ctx, + std::unique_ptr grouper) : BaseRowSegmenter(key_types), - ctx_(ctx), - grouper_(nullptr), + grouper_(std::move(grouper)), save_group_id_(kNoGroupId) {} Status Reset() override { - grouper_ = nullptr; + ARROW_RETURN_NOT_OK(grouper_->Reset()); save_group_id_ = kNoGroupId; return Status::OK(); } @@ -245,7 +245,6 @@ struct AnyKeysSegmenter : public BaseRowSegmenter { // first row of a new segment to see if it extends the previous segment. template Result MapGroupIdAt(const Batch& batch, int64_t offset) { - if (!grouper_) return kNoGroupId; ARROW_ASSIGN_OR_RAISE(auto datum, grouper_->Consume(batch, offset, /*length=*/1)); if (!datum.is_array()) { @@ -264,9 +263,6 @@ struct AnyKeysSegmenter : public BaseRowSegmenter { if (offset == batch.length) { return MakeSegment(batch.length, offset, 0, kEmptyExtends); } - // ARROW-18311: make Grouper support Reset() - // so it can be reset instead of recreated below - // // the group id must be computed prior to resetting the grouper, since it is compared // to save_group_id_, and after resetting the grouper produces incomparable group ids ARROW_ASSIGN_OR_RAISE(auto group_id, MapGroupIdAt(batch, offset)); @@ -276,7 +272,7 @@ struct AnyKeysSegmenter : public BaseRowSegmenter { return extends; }; // resetting drops grouper's group-ids, freeing-up memory for the next segment - ARROW_ASSIGN_OR_RAISE(grouper_, Grouper::Make(key_types_, ctx_)); // TODO: reset it + ARROW_RETURN_NOT_OK(grouper_->Reset()); // GH-34475: cache the grouper-consume result across invocations of GetNextSegment ARROW_ASSIGN_OR_RAISE(auto datum, grouper_->Consume(batch, offset)); if (datum.is_array()) { @@ -299,7 +295,6 @@ struct AnyKeysSegmenter : public BaseRowSegmenter { } private: - ExecContext* const ctx_; std::unique_ptr grouper_; group_id_t save_group_id_; }; @@ -352,8 +347,9 @@ struct GrouperNoKeysImpl : Grouper { } std::shared_ptr array; RETURN_NOT_OK(builder->Finish(&array)); - return std::move(array); + return array; } + Status Reset() override { return Status::OK(); } Result Consume(const ExecSpan& batch, int64_t offset, int64_t length) override { ARROW_ASSIGN_OR_RAISE(auto array, MakeConstantGroupIdArray(length, 0)); return Datum(array); @@ -363,7 +359,7 @@ struct GrouperNoKeysImpl : Grouper { auto values = data->GetMutableValues(0); values[0] = 0; ExecBatch out({Datum(data)}, 1); - return std::move(out); + return out; } uint32_t num_groups() const override { return 1; } }; @@ -416,7 +412,15 @@ struct GrouperImpl : public Grouper { return Status::NotImplemented("Keys of type ", *key); } - return std::move(impl); + return impl; + } + + Status Reset() override { + map_.clear(); + offsets_.clear(); + key_bytes_.clear(); + num_groups_ = 0; + return Status::OK(); } Result Consume(const ExecSpan& batch, int64_t offset, int64_t length) override { @@ -592,10 +596,21 @@ struct GrouperFastImpl : public Grouper { impl->minibatch_hashes_.resize(impl->minibatch_size_max_ + kPaddingForSIMD / sizeof(uint32_t)); - return std::move(impl); + return impl; } - ~GrouperFastImpl() { map_.cleanup(); } + Status Reset() override { + ARROW_DCHECK_EQ(temp_stack_.AllocatedSize(), 0); + rows_.Clean(); + rows_minibatch_.Clean(); + map_.cleanup(); + RETURN_NOT_OK(map_.init(encode_ctx_.hardware_flags, ctx_->memory_pool())); + // TODO: It is now assumed that the dictionaries_ are identical to the first batch + // throughout the grouper's lifespan so no resetting is needed. But if we want to + // support different dictionaries for different batches, we need to reset the + // dictionaries_ here. + return Status::OK(); + } Result Consume(const ExecSpan& batch, int64_t offset, int64_t length) override { ARROW_RETURN_NOT_OK(CheckAndCapLengthForConsume(batch.length, offset, &length)); @@ -838,8 +853,7 @@ struct GrouperFastImpl : public Grouper { return out; } - static constexpr int log_minibatch_max_ = 10; - static constexpr int minibatch_size_max_ = 1 << log_minibatch_max_; + static constexpr int minibatch_size_max_ = arrow::util::MiniBatch::kMiniBatchLength; static constexpr int minibatch_size_min_ = 128; int minibatch_size_; diff --git a/cpp/src/arrow/compute/row/grouper.h b/cpp/src/arrow/compute/row/grouper.h index 628a9c14f3e44..a883fb938ddaf 100644 --- a/cpp/src/arrow/compute/row/grouper.h +++ b/cpp/src/arrow/compute/row/grouper.h @@ -109,6 +109,10 @@ class ARROW_EXPORT Grouper { static Result> Make(const std::vector& key_types, ExecContext* ctx = default_exec_context()); + /// Reset all intermediate state, make the grouper logically as just `Make`ed. + /// The underlying buffers, if any, may or may not be released though. + virtual Status Reset() = 0; + /// Consume a batch of keys, producing the corresponding group ids as an integer array, /// over a slice defined by an offset and length, which defaults to the batch length. /// Currently only uint32 indices will be produced, eventually the bit width will only diff --git a/cpp/src/arrow/compute/row/grouper_benchmark.cc b/cpp/src/arrow/compute/row/grouper_benchmark.cc new file mode 100644 index 0000000000000..1e1a16d579009 --- /dev/null +++ b/cpp/src/arrow/compute/row/grouper_benchmark.cc @@ -0,0 +1,156 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include + +#include "arrow/util/key_value_metadata.h" +#include "arrow/util/string.h" + +#include "arrow/compute/row/grouper.h" +#include "arrow/testing/gtest_util.h" +#include "arrow/testing/random.h" +#include "arrow/util/benchmark_util.h" + +namespace arrow { +namespace compute { + +constexpr auto kSeed = 0x0ff1ce; +constexpr int64_t kRound = 16; +constexpr double true_and_unique_probability = 0.2; + +static ExecBatch MakeRandomExecBatch(const DataTypeVector& types, int64_t num_rows, + double null_probability, + int64_t alignment = kDefaultBufferAlignment, + MemoryPool* memory_pool = nullptr) { + random::RandomArrayGenerator rng(kSeed); + auto num_types = static_cast(types.size()); + + // clang-format off + // For unique probability: + // The proportion of Unique determines the number of groups. + // 1. In most scenarios, unique has a small proportion and exists + // 2. In GroupBy/HashJoin are sometimes used for deduplication and + // in that use case the key is mostly unique + auto metadata = key_value_metadata( + { + "null_probability", + "true_probability", // for boolean type + "unique" // for string type + }, + { + internal::ToChars(null_probability), + internal::ToChars(true_and_unique_probability), + internal::ToChars(static_cast(num_rows * + true_and_unique_probability)) + }); + // clang-format on + + std::vector values; + values.resize(num_types); + for (int i = 0; i < num_types; ++i) { + auto field = ::arrow::field("", types[i], metadata); + values[i] = rng.ArrayOf(*field, num_rows, alignment, memory_pool); + } + + return ExecBatch(std::move(values), num_rows); +} + +static void GrouperBenchmark(benchmark::State& state, const ExecSpan& span, + ExecContext* ctx = nullptr) { + uint32_t num_groups = 0; + for (auto _ : state) { + ASSIGN_OR_ABORT(auto grouper, Grouper::Make(span.GetTypes(), ctx)); + for (int i = 0; i < kRound; ++i) { + ASSIGN_OR_ABORT(auto group_ids, grouper->Consume(span)); + } + num_groups = grouper->num_groups(); + } + + state.SetItemsProcessed(state.iterations() * kRound * span.length); + state.counters["num_groups"] = num_groups; + state.counters["uniqueness"] = static_cast(num_groups) / (kRound * span.length); +} + +static void GrouperWithMultiTypes(benchmark::State& state, const DataTypeVector& types) { + auto ctx = default_exec_context(); + + RegressionArgs args(state, false); + const int64_t num_rows = args.size; + const double null_proportion = args.null_proportion; + + auto exec_batch = MakeRandomExecBatch(types, num_rows, null_proportion, + kDefaultBufferAlignment, ctx->memory_pool()); + ExecSpan exec_span(exec_batch); + ASSIGN_OR_ABORT(auto grouper, Grouper::Make(exec_span.GetTypes(), ctx)); + GrouperBenchmark(state, exec_span, ctx); +} + +void SetArgs(benchmark::internal::Benchmark* bench) { + BenchmarkSetArgsWithSizes(bench, {1 << 10, 1 << 12}); +} + +// This benchmark is mainly to ensure that the construction of our underlying +// RowTable and the performance of the comparison operations in the lower-level +// compare_internal can be tracked (we have not systematically tested these +// underlying operations before). +// +// It mainly covers: +// 1. Basics types, including the impact of null ratio on performance (comparison +// operations will compare null values separately.) +// +// 2. Combination types which will break the CPU-pipeline in column comparision. +// Examples: https://github.com/apache/arrow/pull/41036#issuecomment-2048721547 +// +// 3. Combination types requiring column resorted. These combinations are +// essentially to test the impact of RowTableEncoder's sorting function on +// input columns on the performance of CompareColumnsToRows +// Examples: https://github.com/apache/arrow/pull/40998#issuecomment-2039204161 + +// basic types +BENCHMARK_CAPTURE(GrouperWithMultiTypes, "{boolean}", {boolean()})->Apply(SetArgs); +BENCHMARK_CAPTURE(GrouperWithMultiTypes, "{int32}", {int32()})->Apply(SetArgs); +BENCHMARK_CAPTURE(GrouperWithMultiTypes, "{int64}", {int64()})->Apply(SetArgs); +BENCHMARK_CAPTURE(GrouperWithMultiTypes, "{utf8}", {utf8()})->Apply(SetArgs); +BENCHMARK_CAPTURE(GrouperWithMultiTypes, "{fixed_size_binary(32)}", + {fixed_size_binary(32)}) + ->Apply(SetArgs); + +// combination types +BENCHMARK_CAPTURE(GrouperWithMultiTypes, "{boolean, utf8}", {boolean(), utf8()}) + ->Apply(SetArgs); +BENCHMARK_CAPTURE(GrouperWithMultiTypes, "{int32, int32}", {int32(), int32()}) + ->Apply(SetArgs); +BENCHMARK_CAPTURE(GrouperWithMultiTypes, "{int64, int32}", {int64(), int32()}) + ->Apply(SetArgs); +BENCHMARK_CAPTURE(GrouperWithMultiTypes, "{boolean, int64, utf8}", + {boolean(), int64(), utf8()}) + ->Apply(SetArgs); + +// combination types requiring column resorted +BENCHMARK_CAPTURE(GrouperWithMultiTypes, "{int32, boolean, utf8}", + {int32(), boolean(), utf8()}) + ->Apply(SetArgs); +BENCHMARK_CAPTURE(GrouperWithMultiTypes, "{int32, int64, boolean, utf8}", + {int32(), int64(), boolean(), utf8()}) + ->Apply(SetArgs); +BENCHMARK_CAPTURE(GrouperWithMultiTypes, + "{utf8, int32, int64, fixed_size_binary(32), boolean}", + {utf8(), int32(), int64(), fixed_size_binary(32), boolean()}) + ->Apply(SetArgs); + +} // namespace compute +} // namespace arrow diff --git a/cpp/src/arrow/compute/row/grouper_test.cc b/cpp/src/arrow/compute/row/grouper_test.cc new file mode 100644 index 0000000000000..1e853be5e4af7 --- /dev/null +++ b/cpp/src/arrow/compute/row/grouper_test.cc @@ -0,0 +1,68 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include + +#include "arrow/compute/exec.h" +#include "arrow/compute/row/grouper.h" +#include "arrow/testing/gtest_util.h" +#include "arrow/testing/random.h" + +namespace arrow { +namespace compute { + +// Specialized case for GH-40997 +TEST(Grouper, ResortedColumnsWithLargeNullRows) { + const uint64_t num_rows = 1024; + + // construct random array with plenty of null values + const int32_t kSeed = 42; + const int32_t min = 0; + const int32_t max = 100; + const double null_probability = 0.3; + const double true_probability = 0.5; + auto rng = random::RandomArrayGenerator(kSeed); + auto b_arr = rng.Boolean(num_rows, true_probability, null_probability); + auto i32_arr = rng.Int32(num_rows, min, max, null_probability); + auto i64_arr = rng.Int64(num_rows, min, max * 10, null_probability); + + // construct batches with columns which will be resorted in the grouper make + std::vector exec_batches = {ExecBatch({i64_arr, i32_arr, b_arr}, num_rows), + ExecBatch({i32_arr, i64_arr, b_arr}, num_rows), + ExecBatch({i64_arr, b_arr, i32_arr}, num_rows), + ExecBatch({i32_arr, b_arr, i64_arr}, num_rows), + ExecBatch({b_arr, i32_arr, i64_arr}, num_rows), + ExecBatch({b_arr, i64_arr, i32_arr}, num_rows)}; + + const int num_batches = static_cast(exec_batches.size()); + std::vector group_num_vec; + group_num_vec.reserve(num_batches); + + for (const auto& exec_batch : exec_batches) { + ExecSpan span(exec_batch); + ASSERT_OK_AND_ASSIGN(auto grouper, Grouper::Make(span.GetTypes())); + ASSERT_OK_AND_ASSIGN(Datum group_ids, grouper->Consume(span)); + group_num_vec.emplace_back(grouper->num_groups()); + } + + for (int i = 1; i < num_batches; i++) { + ASSERT_EQ(group_num_vec[i - 1], group_num_vec[i]); + } +} + +} // namespace compute +} // namespace arrow diff --git a/cpp/src/arrow/compute/row/row_internal.cc b/cpp/src/arrow/compute/row/row_internal.cc index f6a62c09fcf24..469205e9b008d 100644 --- a/cpp/src/arrow/compute/row/row_internal.cc +++ b/cpp/src/arrow/compute/row/row_internal.cc @@ -66,7 +66,8 @@ void RowTableMetadata::FromColumnMetadataVector( // // Columns are sorted based on the size in bytes of their fixed-length part. // For the varying-length column, the fixed-length part is the 32-bit field storing - // cumulative length of varying-length fields. + // cumulative length of varying-length fields. This is to make the memory access of + // each individual column within the encoded row alignment-friendly. // // The rules are: // diff --git a/cpp/src/arrow/compute/util.cc b/cpp/src/arrow/compute/util.cc index b0c863b26a062..b90b3a64056bd 100644 --- a/cpp/src/arrow/compute/util.cc +++ b/cpp/src/arrow/compute/util.cc @@ -17,11 +17,7 @@ #include "arrow/compute/util.h" -#include "arrow/table.h" -#include "arrow/util/bit_util.h" -#include "arrow/util/bitmap_ops.h" #include "arrow/util/logging.h" -#include "arrow/util/tracing_internal.h" #include "arrow/util/ubsan.h" namespace arrow { @@ -31,33 +27,6 @@ using internal::CpuInfo; namespace util { -void TempVectorStack::alloc(uint32_t num_bytes, uint8_t** data, int* id) { - int64_t new_top = top_ + EstimatedAllocationSize(num_bytes); - // Stack overflow check (see GH-39582). - // XXX cannot return a regular Status because most consumers do not either. - ARROW_CHECK_LE(new_top, buffer_size_) << "TempVectorStack::alloc overflow"; - *data = buffer_->mutable_data() + top_ + sizeof(uint64_t); - // We set 8 bytes before the beginning of the allocated range and - // 8 bytes after the end to check for stack overflow (which would - // result in those known bytes being corrupted). - reinterpret_cast(buffer_->mutable_data() + top_)[0] = kGuard1; - reinterpret_cast(buffer_->mutable_data() + new_top)[-1] = kGuard2; - *id = num_vectors_++; - top_ = new_top; -} - -void TempVectorStack::release(int id, uint32_t num_bytes) { - ARROW_DCHECK(num_vectors_ == id + 1); - int64_t size = EstimatedAllocationSize(num_bytes); - ARROW_DCHECK(reinterpret_cast(buffer_->mutable_data() + top_)[-1] == - kGuard2); - ARROW_DCHECK(top_ >= size); - top_ -= size; - ARROW_DCHECK(reinterpret_cast(buffer_->mutable_data() + top_)[0] == - kGuard1); - --num_vectors_; -} - namespace bit_util { inline uint64_t SafeLoadUpTo8Bytes(const uint8_t* bytes, int num_bytes) { diff --git a/cpp/src/arrow/compute/util.h b/cpp/src/arrow/compute/util.h index 88dce160ce936..d56e398667f66 100644 --- a/cpp/src/arrow/compute/util.h +++ b/cpp/src/arrow/compute/util.h @@ -24,17 +24,10 @@ #include #include -#include "arrow/buffer.h" #include "arrow/compute/expression.h" #include "arrow/compute/type_fwd.h" -#include "arrow/memory_pool.h" #include "arrow/result.h" -#include "arrow/status.h" -#include "arrow/util/bit_util.h" #include "arrow/util/cpu_info.h" -#include "arrow/util/mutex.h" -#include "arrow/util/thread_pool.h" -#include "arrow/util/type_fwd.h" #if defined(__clang__) || defined(__GNUC__) #define BYTESWAP(x) __builtin_bswap64(x) @@ -77,72 +70,6 @@ class MiniBatch { static constexpr int kMiniBatchLength = 1 << kLogMiniBatchLength; }; -/// Storage used to allocate temporary vectors of a batch size. -/// Temporary vectors should resemble allocating temporary variables on the stack -/// but in the context of vectorized processing where we need to store a vector of -/// temporaries instead of a single value. -class ARROW_EXPORT TempVectorStack { - template - friend class TempVectorHolder; - - public: - Status Init(MemoryPool* pool, int64_t size) { - num_vectors_ = 0; - top_ = 0; - buffer_size_ = EstimatedAllocationSize(size); - ARROW_ASSIGN_OR_RAISE(auto buffer, AllocateResizableBuffer(size, pool)); - // Ensure later operations don't accidentally read uninitialized memory. - std::memset(buffer->mutable_data(), 0xFF, size); - buffer_ = std::move(buffer); - return Status::OK(); - } - - private: - static int64_t EstimatedAllocationSize(int64_t size) { - return PaddedAllocationSize(size) + 2 * sizeof(uint64_t); - } - - static int64_t PaddedAllocationSize(int64_t num_bytes) { - // Round up allocation size to multiple of 8 bytes - // to avoid returning temp vectors with unaligned address. - // - // Also add padding at the end to facilitate loads and stores - // using SIMD when number of vector elements is not divisible - // by the number of SIMD lanes. - // - return ::arrow::bit_util::RoundUp(num_bytes, sizeof(int64_t)) + kPadding; - } - void alloc(uint32_t num_bytes, uint8_t** data, int* id); - void release(int id, uint32_t num_bytes); - static constexpr uint64_t kGuard1 = 0x3141592653589793ULL; - static constexpr uint64_t kGuard2 = 0x0577215664901532ULL; - static constexpr int64_t kPadding = 64; - int num_vectors_; - int64_t top_; - std::unique_ptr buffer_; - int64_t buffer_size_; -}; - -template -class TempVectorHolder { - friend class TempVectorStack; - - public: - ~TempVectorHolder() { stack_->release(id_, num_elements_ * sizeof(T)); } - T* mutable_data() { return reinterpret_cast(data_); } - TempVectorHolder(TempVectorStack* stack, uint32_t num_elements) { - stack_ = stack; - num_elements_ = num_elements; - stack_->alloc(num_elements * sizeof(T), &data_, &id_); - } - - private: - TempVectorStack* stack_; - uint8_t* data_; - int id_; - uint32_t num_elements_; -}; - namespace bit_util { ARROW_EXPORT void bits_to_indexes(int bit_to_search, int64_t hardware_flags, diff --git a/cpp/src/arrow/compute/util_internal.cc b/cpp/src/arrow/compute/util_internal.cc new file mode 100644 index 0000000000000..7a7875162c434 --- /dev/null +++ b/cpp/src/arrow/compute/util_internal.cc @@ -0,0 +1,100 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "arrow/compute/util_internal.h" + +#include "arrow/compute/util.h" +#include "arrow/memory_pool.h" + +#ifdef ADDRESS_SANITIZER +#include +#endif + +namespace arrow { +namespace util { + +TempVectorStack::~TempVectorStack() { +#ifdef ADDRESS_SANITIZER + if (buffer_) { + ASAN_UNPOISON_MEMORY_REGION(buffer_->mutable_data(), buffer_size_); + } +#endif +} + +Status TempVectorStack::Init(MemoryPool* pool, int64_t size) { + num_vectors_ = 0; + top_ = 0; + buffer_size_ = EstimatedAllocationSize(size); + ARROW_ASSIGN_OR_RAISE(auto buffer, AllocateResizableBuffer(buffer_size_, pool)); +#ifdef ADDRESS_SANITIZER + ASAN_POISON_MEMORY_REGION(buffer->mutable_data(), buffer_size_); +#endif + buffer_ = std::move(buffer); + return Status::OK(); +} + +int64_t TempVectorStack::PaddedAllocationSize(int64_t num_bytes) { + // Round up allocation size to multiple of 8 bytes + // to avoid returning temp vectors with unaligned address. + // + // Also add padding at the end to facilitate loads and stores + // using SIMD when number of vector elements is not divisible + // by the number of SIMD lanes. + // + return ::arrow::bit_util::RoundUp(num_bytes, sizeof(int64_t)) + kPadding; +} + +void TempVectorStack::alloc(uint32_t num_bytes, uint8_t** data, int* id) { + int64_t estimated_alloc_size = EstimatedAllocationSize(num_bytes); + int64_t new_top = top_ + estimated_alloc_size; + // Stack overflow check (see GH-39582). + // XXX cannot return a regular Status because most consumers do not either. + ARROW_CHECK_LE(new_top, buffer_size_) + << "TempVectorStack::alloc overflow: allocating " << estimated_alloc_size + << " on top of " << top_ << " in stack of size " << buffer_size_; +#ifdef ADDRESS_SANITIZER + ASAN_UNPOISON_MEMORY_REGION(buffer_->mutable_data() + top_, estimated_alloc_size); +#endif + *data = buffer_->mutable_data() + top_ + /*one guard*/ sizeof(uint64_t); +#ifndef NDEBUG + // We set 8 bytes before the beginning of the allocated range and + // 8 bytes after the end to check for stack overflow (which would + // result in those known bytes being corrupted). + reinterpret_cast(buffer_->mutable_data() + top_)[0] = kGuard1; + reinterpret_cast(buffer_->mutable_data() + new_top)[-1] = kGuard2; +#endif + *id = num_vectors_++; + top_ = new_top; +} + +void TempVectorStack::release(int id, uint32_t num_bytes) { + ARROW_DCHECK(num_vectors_ == id + 1); + int64_t size = EstimatedAllocationSize(num_bytes); + ARROW_DCHECK(reinterpret_cast(buffer_->mutable_data() + top_)[-1] == + kGuard2); + ARROW_DCHECK(top_ >= size); + top_ -= size; + ARROW_DCHECK(reinterpret_cast(buffer_->mutable_data() + top_)[0] == + kGuard1); +#ifdef ADDRESS_SANITIZER + ASAN_POISON_MEMORY_REGION(buffer_->mutable_data() + top_, size); +#endif + --num_vectors_; +} + +} // namespace util +} // namespace arrow diff --git a/cpp/src/arrow/compute/util_internal.h b/cpp/src/arrow/compute/util_internal.h index 87e89a3350721..5e5b15a5ff600 100644 --- a/cpp/src/arrow/compute/util_internal.h +++ b/cpp/src/arrow/compute/util_internal.h @@ -17,7 +17,10 @@ #pragma once +#include "arrow/status.h" +#include "arrow/type_fwd.h" #include "arrow/util/logging.h" +#include "arrow/util/macros.h" namespace arrow { namespace util { @@ -27,5 +30,65 @@ void CheckAlignment(const void* ptr) { ARROW_DCHECK(reinterpret_cast(ptr) % sizeof(T) == 0); } +/// Storage used to allocate temporary vectors of a batch size. +/// Temporary vectors should resemble allocating temporary variables on the stack +/// but in the context of vectorized processing where we need to store a vector of +/// temporaries instead of a single value. +class ARROW_EXPORT TempVectorStack { + template + friend class TempVectorHolder; + + public: + TempVectorStack() = default; + ~TempVectorStack(); + + ARROW_DISALLOW_COPY_AND_ASSIGN(TempVectorStack); + + ARROW_DEFAULT_MOVE_AND_ASSIGN(TempVectorStack); + + Status Init(MemoryPool* pool, int64_t size); + + int64_t AllocatedSize() const { return top_; } + + private: + static int64_t EstimatedAllocationSize(int64_t size) { + return PaddedAllocationSize(size) + /*two guards*/ 2 * sizeof(uint64_t); + } + + static int64_t PaddedAllocationSize(int64_t num_bytes); + + void alloc(uint32_t num_bytes, uint8_t** data, int* id); + void release(int id, uint32_t num_bytes); + static constexpr uint64_t kGuard1 = 0x3141592653589793ULL; + static constexpr uint64_t kGuard2 = 0x0577215664901532ULL; + static constexpr int64_t kPadding = 64; + int num_vectors_; + int64_t top_; + std::unique_ptr buffer_; + int64_t buffer_size_; + + friend class TempVectorStackTest; +}; + +template +class TempVectorHolder { + friend class TempVectorStack; + + public: + ~TempVectorHolder() { stack_->release(id_, num_elements_ * sizeof(T)); } + T* mutable_data() { return reinterpret_cast(data_); } + TempVectorHolder(TempVectorStack* stack, uint32_t num_elements) { + stack_ = stack; + num_elements_ = num_elements; + stack_->alloc(num_elements * sizeof(T), &data_, &id_); + } + + private: + TempVectorStack* stack_; + uint8_t* data_; + int id_; + uint32_t num_elements_; +}; + } // namespace util } // namespace arrow diff --git a/cpp/src/arrow/compute/util_internal_test.cc b/cpp/src/arrow/compute/util_internal_test.cc new file mode 100644 index 0000000000000..fbf34f2228488 --- /dev/null +++ b/cpp/src/arrow/compute/util_internal_test.cc @@ -0,0 +1,52 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include + +#include "arrow/buffer.h" +#include "arrow/compute/util_internal.h" +#include "arrow/testing/gtest_util.h" + +namespace arrow { +namespace util { + +class TempVectorStackTest : public ::testing::Test { + protected: + static const uint8_t* BufferData(const TempVectorStack& stack) { + return stack.buffer_->data(); + } + + static int64_t BufferCapacity(const TempVectorStack& stack) { + return stack.buffer_->capacity(); + } +}; + +// GH-41738: Test the underlying buffer capacity is sufficient to hold the requested +// vector. +TEST_F(TempVectorStackTest, BufferCapacitySufficiency) { + for (uint32_t stack_size : {1, 7, 8, 63, 64, 65535, 65536}) { + ARROW_SCOPED_TRACE("stack_size = ", stack_size); + TempVectorStack stack; + ASSERT_OK(stack.Init(default_memory_pool(), stack_size)); + + TempVectorHolder v(&stack, stack_size); + ASSERT_LE(v.mutable_data() + stack_size, BufferData(stack) + BufferCapacity(stack)); + } +} + +} // namespace util +} // namespace arrow diff --git a/cpp/src/arrow/config.cc b/cpp/src/arrow/config.cc index 9e32e5437325f..a0e3a079b3157 100644 --- a/cpp/src/arrow/config.cc +++ b/cpp/src/arrow/config.cc @@ -20,6 +20,7 @@ #include #include "arrow/util/config.h" +#include "arrow/util/config_internal.h" #include "arrow/util/cpu_info.h" #include "arrow/vendored/datetime.h" diff --git a/cpp/src/arrow/csv/reader.cc b/cpp/src/arrow/csv/reader.cc index e981fafe8e780..e3033a81486e8 100644 --- a/cpp/src/arrow/csv/reader.cc +++ b/cpp/src/arrow/csv/reader.cc @@ -1053,8 +1053,8 @@ class AsyncThreadedTableReader auto self = shared_from_this(); return ProcessFirstBuffer().Then([self](const std::shared_ptr& first_buffer) { auto block_generator = ThreadedBlockReader::MakeAsyncIterator( - self->buffer_generator_, MakeChunker(self->parse_options_), - std::move(first_buffer), self->read_options_.skip_rows_after_names); + self->buffer_generator_, MakeChunker(self->parse_options_), first_buffer, + self->read_options_.skip_rows_after_names); std::function block_visitor = [self](CSVBlock maybe_block) -> Status { diff --git a/cpp/src/arrow/dataset/dataset.cc b/cpp/src/arrow/dataset/dataset.cc index 0e7bc3da84235..8a3df182474bf 100644 --- a/cpp/src/arrow/dataset/dataset.cc +++ b/cpp/src/arrow/dataset/dataset.cc @@ -402,7 +402,7 @@ class BasicFragmentEvolution : public FragmentEvolutionStrategy { } return compute::field_ref(FieldRef(std::move(modified_indices))); } - return std::move(expr); + return expr; }, [](compute::Expression expr, compute::Expression* old_expr) { return expr; }); }; diff --git a/cpp/src/arrow/dataset/dataset_writer.cc b/cpp/src/arrow/dataset/dataset_writer.cc index 754386275d60c..c60042dd6fef8 100644 --- a/cpp/src/arrow/dataset/dataset_writer.cc +++ b/cpp/src/arrow/dataset/dataset_writer.cc @@ -408,8 +408,7 @@ class DatasetWriterDirectoryQueue { write_options, writer_state); dir_queue->PrepareDirectory(); ARROW_ASSIGN_OR_RAISE(dir_queue->current_filename_, dir_queue->GetNextFilename()); - // std::move required to make RTools 3.5 mingw compiler happy - return std::move(dir_queue); + return dir_queue; } Status Finish() { diff --git a/cpp/src/arrow/dataset/discovery_test.cc b/cpp/src/arrow/dataset/discovery_test.cc index 92cec7f324963..981146b7999ef 100644 --- a/cpp/src/arrow/dataset/discovery_test.cc +++ b/cpp/src/arrow/dataset/discovery_test.cc @@ -144,7 +144,8 @@ class FileSystemDatasetFactoryTest : public DatasetFactoryTest { } options_ = std::make_shared(); options_->dataset_schema = schema; - ASSERT_OK_AND_ASSIGN(auto projection, ProjectionDescr::Default(*schema)); + ASSERT_OK_AND_ASSIGN(auto projection, ProjectionDescr::Default( + *schema, options_->add_augmented_fields)); SetProjection(options_.get(), std::move(projection)); ASSERT_OK_AND_ASSIGN(dataset_, factory_->Finish(schema)); ASSERT_OK_AND_ASSIGN(auto fragment_it, dataset_->GetFragments()); diff --git a/cpp/src/arrow/dataset/file_csv.cc b/cpp/src/arrow/dataset/file_csv.cc index 09ab775727c98..6258a674deee5 100644 --- a/cpp/src/arrow/dataset/file_csv.cc +++ b/cpp/src/arrow/dataset/file_csv.cc @@ -106,7 +106,7 @@ class CsvFileScanner : public FragmentScanner { } convert_options.include_columns = std::move(columns); convert_options.column_types = std::move(column_types); - return std::move(convert_options); + return convert_options; } static Future> Make( diff --git a/cpp/src/arrow/dataset/file_parquet.cc b/cpp/src/arrow/dataset/file_parquet.cc index c17ba89be7907..1f8b6cc4882cf 100644 --- a/cpp/src/arrow/dataset/file_parquet.cc +++ b/cpp/src/arrow/dataset/file_parquet.cc @@ -279,7 +279,7 @@ Status ResolveOneFieldRef( // names) based on the dataset schema. Returns `false` if no conversion was needed. Result MaybeConvertFieldRef(FieldRef ref, const Schema& dataset_schema) { if (ARROW_PREDICT_TRUE(ref.IsNameSequence())) { - return std::move(ref); + return ref; } ARROW_ASSIGN_OR_RAISE(auto path, ref.FindOne(dataset_schema)); @@ -504,7 +504,8 @@ Result> ParquetFileFormat::GetReader std::unique_ptr arrow_reader; RETURN_NOT_OK(parquet::arrow::FileReader::Make( options->pool, std::move(reader), std::move(arrow_properties), &arrow_reader)); - return std::move(arrow_reader); + // R build with openSUSE155 requires an explicit shared_ptr construction + return std::shared_ptr(std::move(arrow_reader)); } Future> ParquetFileFormat::GetReaderAsync( @@ -543,7 +544,9 @@ Future> ParquetFileFormat::GetReader reader)), std::move(arrow_properties), &arrow_reader)); - return std::move(arrow_reader); + // R build with openSUSE155 requires an explicit shared_ptr construction + return std::shared_ptr( + std::move(arrow_reader)); }, [path = source.path()](const Status& status) -> Result> { diff --git a/cpp/src/arrow/dataset/file_parquet_encryption_test.cc b/cpp/src/arrow/dataset/file_parquet_encryption_test.cc index 307017fd67e06..0287d593d12d3 100644 --- a/cpp/src/arrow/dataset/file_parquet_encryption_test.cc +++ b/cpp/src/arrow/dataset/file_parquet_encryption_test.cc @@ -148,17 +148,22 @@ class DatasetEncryptionTestBase : public ::testing::Test { FileSystemDatasetFactory::Make(file_system_, selector, file_format, factory_options)); - // Read dataset into table + // Create the dataset ASSERT_OK_AND_ASSIGN(auto dataset, dataset_factory->Finish()); - ASSERT_OK_AND_ASSIGN(auto scanner_builder, dataset->NewScan()); - ASSERT_OK_AND_ASSIGN(auto scanner, scanner_builder->Finish()); - ASSERT_OK_AND_ASSIGN(auto read_table, scanner->ToTable()); - - // Verify the data was read correctly - ASSERT_OK_AND_ASSIGN(auto combined_table, read_table->CombineChunks()); - // Validate the table - ASSERT_OK(combined_table->ValidateFull()); - AssertTablesEqual(*combined_table, *table_); + + // Reuse the dataset above to scan it twice to make sure decryption works correctly. + for (size_t i = 0; i < 2; ++i) { + // Read dataset into table + ASSERT_OK_AND_ASSIGN(auto scanner_builder, dataset->NewScan()); + ASSERT_OK_AND_ASSIGN(auto scanner, scanner_builder->Finish()); + ASSERT_OK_AND_ASSIGN(auto read_table, scanner->ToTable()); + + // Verify the data was read correctly + ASSERT_OK_AND_ASSIGN(auto combined_table, read_table->CombineChunks()); + // Validate the table + ASSERT_OK(combined_table->ValidateFull()); + AssertTablesEqual(*combined_table, *table_); + } } protected: diff --git a/cpp/src/arrow/dataset/file_parquet_test.cc b/cpp/src/arrow/dataset/file_parquet_test.cc index 76cd0af3b835f..bf626826d4d1b 100644 --- a/cpp/src/arrow/dataset/file_parquet_test.cc +++ b/cpp/src/arrow/dataset/file_parquet_test.cc @@ -330,8 +330,9 @@ TEST_F(TestParquetFileFormat, CachedMetadata) { // Read the file the first time, will read metadata auto options = std::make_shared(); options->filter = literal(true); - ASSERT_OK_AND_ASSIGN(auto projection_descr, - ProjectionDescr::FromNames({"x"}, *test_schema)); + ASSERT_OK_AND_ASSIGN( + auto projection_descr, + ProjectionDescr::FromNames({"x"}, *test_schema, options->add_augmented_fields)); options->projected_schema = projection_descr.schema; options->projection = projection_descr.expression; ASSERT_OK_AND_ASSIGN(auto generator, fragment->ScanBatchesAsync(options)); diff --git a/cpp/src/arrow/dataset/scan_node.cc b/cpp/src/arrow/dataset/scan_node.cc index c25c5b70ae1ec..4493332111429 100644 --- a/cpp/src/arrow/dataset/scan_node.cc +++ b/cpp/src/arrow/dataset/scan_node.cc @@ -166,7 +166,7 @@ class ScanNode : public acero::ExecNode, public acero::TracedNode { return Status::Invalid("A scan filter must be a boolean expression"); } - return std::move(normalized); + return normalized; } static Result Make(acero::ExecPlan* plan, @@ -334,7 +334,7 @@ class ScanNode : public acero::ExecNode, public acero::TracedNode { extracted.known_values.push_back({i, *maybe_casted}); } } - return std::move(extracted); + return extracted; } Future<> BeginScan(const std::shared_ptr& inspected_fragment) { @@ -427,7 +427,7 @@ class ScanNode : public acero::ExecNode, public acero::TracedNode { /*queue=*/nullptr, [this]() { return output_->InputFinished(this, num_batches_.load()); }); fragment_tasks->AddAsyncGenerator>( - std::move(frag_gen), + frag_gen, [this, fragment_tasks = std::move(fragment_tasks)](const std::shared_ptr& fragment) { fragment_tasks->AddTask(std::make_unique(this, fragment)); diff --git a/cpp/src/arrow/dataset/scanner.cc b/cpp/src/arrow/dataset/scanner.cc index 18981d1451980..a856a792a264f 100644 --- a/cpp/src/arrow/dataset/scanner.cc +++ b/cpp/src/arrow/dataset/scanner.cc @@ -211,7 +211,8 @@ Status NormalizeScanOptions(const std::shared_ptr& scan_options, // create the projected schema only if the provided expressions // produces valid set of fields. ARROW_ASSIGN_OR_RAISE(auto projection_descr, - ProjectionDescr::Default(*projected_schema)); + ProjectionDescr::Default( + *projected_schema, scan_options->add_augmented_fields)); scan_options->projected_schema = std::move(projection_descr.schema); scan_options->projection = projection_descr.expression; ARROW_ASSIGN_OR_RAISE(scan_options->projection, @@ -220,7 +221,8 @@ Status NormalizeScanOptions(const std::shared_ptr& scan_options, // if projected_fields are not found, we default to creating the projected_schema // and projection from the dataset_schema. ARROW_ASSIGN_OR_RAISE(auto projection_descr, - ProjectionDescr::Default(*dataset_schema)); + ProjectionDescr::Default( + *dataset_schema, scan_options->add_augmented_fields)); scan_options->projected_schema = std::move(projection_descr.schema); scan_options->projection = projection_descr.expression; } @@ -231,7 +233,7 @@ Status NormalizeScanOptions(const std::shared_ptr& scan_options, ARROW_ASSIGN_OR_RAISE( auto projection_descr, ProjectionDescr::FromNames(scan_options->projected_schema->field_names(), - *dataset_schema)); + *dataset_schema, scan_options->add_augmented_fields)); scan_options->projection = projection_descr.expression; } @@ -730,7 +732,8 @@ Future AsyncScanner::CountRowsAsync(Executor* executor) { const auto options = std::make_shared(*scan_options_); ARROW_ASSIGN_OR_RAISE(auto empty_projection, ProjectionDescr::FromNames(std::vector(), - *scan_options_->dataset_schema)); + *scan_options_->dataset_schema, + scan_options_->add_augmented_fields)); SetProjection(options.get(), empty_projection); auto total = std::make_shared>(0); @@ -828,7 +831,8 @@ Result ProjectionDescr::FromExpressions( } Result ProjectionDescr::FromNames(std::vector names, - const Schema& dataset_schema) { + const Schema& dataset_schema, + bool add_augmented_fields) { std::vector exprs(names.size()); for (size_t i = 0; i < exprs.size(); ++i) { // If name isn't in schema, try finding it by dotted path. @@ -846,15 +850,19 @@ Result ProjectionDescr::FromNames(std::vector name } } auto fields = dataset_schema.fields(); - for (const auto& aug_field : kAugmentedFields) { - fields.push_back(aug_field); + if (add_augmented_fields) { + for (const auto& aug_field : kAugmentedFields) { + fields.push_back(aug_field); + } } return ProjectionDescr::FromExpressions(std::move(exprs), std::move(names), Schema(fields, dataset_schema.metadata())); } -Result ProjectionDescr::Default(const Schema& dataset_schema) { - return ProjectionDescr::FromNames(dataset_schema.field_names(), dataset_schema); +Result ProjectionDescr::Default(const Schema& dataset_schema, + bool add_augmented_fields) { + return ProjectionDescr::FromNames(dataset_schema.field_names(), dataset_schema, + add_augmented_fields); } void SetProjection(ScanOptions* options, ProjectionDescr projection) { @@ -899,7 +907,8 @@ const std::shared_ptr& ScannerBuilder::projected_schema() const { Status ScannerBuilder::Project(std::vector columns) { ARROW_ASSIGN_OR_RAISE( auto projection, - ProjectionDescr::FromNames(std::move(columns), *scan_options_->dataset_schema)); + ProjectionDescr::FromNames(std::move(columns), *scan_options_->dataset_schema, + scan_options_->add_augmented_fields)); SetProjection(scan_options_.get(), std::move(projection)); return Status::OK(); } @@ -1052,8 +1061,10 @@ Result MakeScanNode(acero::ExecPlan* plan, }); auto fields = scan_options->dataset_schema->fields(); - for (const auto& aug_field : kAugmentedFields) { - fields.push_back(aug_field); + if (scan_options->add_augmented_fields) { + for (const auto& aug_field : kAugmentedFields) { + fields.push_back(aug_field); + } } return acero::MakeExecNode( diff --git a/cpp/src/arrow/dataset/scanner.h b/cpp/src/arrow/dataset/scanner.h index 4479158ff20cc..d2de267897180 100644 --- a/cpp/src/arrow/dataset/scanner.h +++ b/cpp/src/arrow/dataset/scanner.h @@ -114,6 +114,9 @@ struct ARROW_DS_EXPORT ScanOptions { /// Note: This must be true in order for any readahead to happen bool use_threads = false; + /// If true the scanner will add augmented fields to the output schema. + bool add_augmented_fields = true; + /// Fragment-specific scan options. std::shared_ptr fragment_scan_options; @@ -287,10 +290,12 @@ struct ARROW_DS_EXPORT ProjectionDescr { /// \brief Create a default projection referencing fields in the dataset schema static Result FromNames(std::vector names, - const Schema& dataset_schema); + const Schema& dataset_schema, + bool add_augmented_fields = true); /// \brief Make a projection that projects every field in the dataset schema - static Result Default(const Schema& dataset_schema); + static Result Default(const Schema& dataset_schema, + bool add_augmented_fields = true); }; /// \brief Utility method to set the projection expression and schema diff --git a/cpp/src/arrow/dataset/scanner_test.cc b/cpp/src/arrow/dataset/scanner_test.cc index fccfc80032d31..58bc9c8c0ea6b 100644 --- a/cpp/src/arrow/dataset/scanner_test.cc +++ b/cpp/src/arrow/dataset/scanner_test.cc @@ -1103,7 +1103,8 @@ TEST_P(TestScanner, ProjectionDefaults) { } // If we only specify a projection expression then infer the projected schema // from the projection expression - auto projection_desc = ProjectionDescr::FromNames({"i32"}, *schema_); + auto projection_desc = + ProjectionDescr::FromNames({"i32"}, *schema_, /*add_augmented_fields=*/true); { ARROW_SCOPED_TRACE("User only specifies projection"); options_->projection = projection_desc->expression; @@ -1148,7 +1149,8 @@ TEST_P(TestScanner, ProjectedScanNestedFromNames) { }); ASSERT_OK_AND_ASSIGN(auto descr, ProjectionDescr::FromNames({".struct.i32", "nested.right.f64"}, - *options_->dataset_schema)) + *options_->dataset_schema, + options_->add_augmented_fields)) SetProjection(options_.get(), std::move(descr)); auto batch_in = ConstantArrayGenerator::Zeroes(GetParam().items_per_batch, schema_); auto batch_out = ConstantArrayGenerator::Zeroes( @@ -2106,7 +2108,8 @@ TEST(ScanOptions, TestMaterializedFields) { auto set_projection_from_names = [&opts](std::vector names) { ASSERT_OK_AND_ASSIGN(auto projection, ProjectionDescr::FromNames( - std::move(names), *opts->dataset_schema)); + std::move(names), *opts->dataset_schema, + opts->add_augmented_fields)); SetProjection(opts.get(), std::move(projection)); }; @@ -2160,7 +2163,8 @@ TEST(ScanOptions, TestMaterializedFields) { // project top-level field, filter nothing opts->filter = literal(true); ASSERT_OK_AND_ASSIGN(projection, - ProjectionDescr::FromNames({"nested"}, *opts->dataset_schema)); + ProjectionDescr::FromNames({"nested"}, *opts->dataset_schema, + opts->add_augmented_fields)); SetProjection(opts.get(), std::move(projection)); EXPECT_THAT(opts->MaterializedFields(), ElementsAre(FieldRef("nested"))); diff --git a/cpp/src/arrow/dataset/test_util_internal.h b/cpp/src/arrow/dataset/test_util_internal.h index de0519afac9e1..8195218b0cfe8 100644 --- a/cpp/src/arrow/dataset/test_util_internal.h +++ b/cpp/src/arrow/dataset/test_util_internal.h @@ -386,7 +386,8 @@ class DatasetFixtureMixin : public ::testing::Test { options_ = std::make_shared(); options_->dataset_schema = schema_; ASSERT_OK_AND_ASSIGN(auto projection, - ProjectionDescr::FromNames(schema_->field_names(), *schema_)); + ProjectionDescr::FromNames(schema_->field_names(), *schema_, + options_->add_augmented_fields)); SetProjection(options_.get(), std::move(projection)); SetFilter(literal(true)); } @@ -398,7 +399,8 @@ class DatasetFixtureMixin : public ::testing::Test { void SetProjectedColumns(std::vector column_names) { ASSERT_OK_AND_ASSIGN( auto projection, - ProjectionDescr::FromNames(std::move(column_names), *options_->dataset_schema)); + ProjectionDescr::FromNames(std::move(column_names), *options_->dataset_schema, + /*add_augmented_fields=*/true)); SetProjection(options_.get(), std::move(projection)); } @@ -502,7 +504,8 @@ class FileFormatFixtureMixin : public ::testing::Test { void SetSchema(std::vector> fields) { opts_->dataset_schema = schema(std::move(fields)); ASSERT_OK_AND_ASSIGN(auto projection, - ProjectionDescr::Default(*opts_->dataset_schema)); + ProjectionDescr::Default(*opts_->dataset_schema, + /*add_augmented_fields=*/true)); SetProjection(opts_.get(), std::move(projection)); } @@ -512,7 +515,8 @@ class FileFormatFixtureMixin : public ::testing::Test { void Project(std::vector names) { ASSERT_OK_AND_ASSIGN(auto projection, ProjectionDescr::FromNames( - std::move(names), *opts_->dataset_schema)); + std::move(names), *opts_->dataset_schema, + /*add_augmented_fields=*/true)); SetProjection(opts_.get(), std::move(projection)); } @@ -993,7 +997,8 @@ class FileFormatScanMixin : public FileFormatFixtureMixin, auto i64 = field("i64", int64()); this->opts_->dataset_schema = schema({i32, i32, i64}); ASSERT_RAISES(Invalid, - ProjectionDescr::FromNames({"i32"}, *this->opts_->dataset_schema)); + ProjectionDescr::FromNames({"i32"}, *this->opts_->dataset_schema, + /*add_augmented_fields=*/true)); } void TestScanWithPushdownNulls() { // Regression test for ARROW-15312 @@ -1933,7 +1938,8 @@ class WriteFileSystemDatasetMixin : public MakeFileSystemDatasetMixin { scan_options_->dataset_schema = dataset_->schema(); ASSERT_OK_AND_ASSIGN( auto projection, - ProjectionDescr::FromNames(source_schema_->field_names(), *dataset_->schema())); + ProjectionDescr::FromNames(source_schema_->field_names(), *dataset_->schema(), + scan_options_->add_augmented_fields)); SetProjection(scan_options_.get(), std::move(projection)); } diff --git a/cpp/src/arrow/device.cc b/cpp/src/arrow/device.cc index 98b8f7b30397e..c2195e89e75ee 100644 --- a/cpp/src/arrow/device.cc +++ b/cpp/src/arrow/device.cc @@ -116,6 +116,32 @@ Result> MemoryManager::ViewBuffer( " on ", to->device()->ToString(), " not supported"); } +Status MemoryManager::CopyBufferSliceToCPU(const std::shared_ptr& buf, + int64_t offset, int64_t length, + uint8_t* out_data) { + if (ARROW_PREDICT_TRUE(buf->is_cpu())) { + memcpy(out_data, buf->data() + offset, static_cast(length)); + return Status::OK(); + } + + auto& from = buf->memory_manager(); + auto cpu_mm = default_cpu_memory_manager(); + // Try a view first + auto maybe_buffer_result = from->ViewBufferTo(buf, cpu_mm); + if (!COPY_BUFFER_SUCCESS(maybe_buffer_result)) { + // View failed, try a copy instead + maybe_buffer_result = from->CopyBufferTo(buf, cpu_mm); + } + ARROW_ASSIGN_OR_RAISE(auto maybe_buffer, std::move(maybe_buffer_result)); + if (maybe_buffer != nullptr) { + memcpy(out_data, maybe_buffer->data() + offset, static_cast(length)); + return Status::OK(); + } + + return Status::NotImplemented("Copying buffer slice from ", from->device()->ToString(), + " to CPU not supported"); +} + #undef COPY_BUFFER_RETURN #undef COPY_BUFFER_SUCCESS @@ -189,7 +215,7 @@ Result> CPUMemoryManager::CopyNonOwnedFrom( if (buf.size() > 0) { memcpy(dest->mutable_data(), buf.data(), static_cast(buf.size())); } - return std::move(dest); + return dest; } Result> CPUMemoryManager::ViewBufferFrom( @@ -221,7 +247,7 @@ Result> CPUMemoryManager::CopyNonOwnedTo( if (buf.size() > 0) { memcpy(dest->mutable_data(), buf.data(), static_cast(buf.size())); } - return std::move(dest); + return dest; } Result> CPUMemoryManager::ViewBufferTo( diff --git a/cpp/src/arrow/device.h b/cpp/src/arrow/device.h index 3003bad7c459c..f5cca0d27d7b2 100644 --- a/cpp/src/arrow/device.h +++ b/cpp/src/arrow/device.h @@ -140,7 +140,7 @@ class ARROW_EXPORT Device : public std::enable_shared_from_this, /// derived from Device::Stream to allow for stream ordered events /// and memory allocations. virtual Result> MakeStream( - [[maybe_unused]] unsigned int flags) { + unsigned int ARROW_ARG_UNUSED(flags)) { return NULLPTR; } @@ -151,8 +151,8 @@ class ARROW_EXPORT Device : public std::enable_shared_from_this, /// a no-op function can be passed to indicate ownership is maintained /// externally virtual Result> WrapStream( - [[maybe_unused]] void* device_stream, - [[maybe_unused]] Stream::release_fn_t release_fn) { + void* ARROW_ARG_UNUSED(device_stream), + Stream::release_fn_t ARROW_ARG_UNUSED(release_fn)) { return NULLPTR; } @@ -249,6 +249,10 @@ class ARROW_EXPORT MemoryManager : public std::enable_shared_from_this> ViewBuffer( const std::shared_ptr& source, const std::shared_ptr& to); + /// \brief Copy a slice of a buffer into a CPU pointer + static Status CopyBufferSliceToCPU(const std::shared_ptr& buf, int64_t offset, + int64_t length, uint8_t* out_data); + /// \brief Create a new SyncEvent. /// /// This version should construct the appropriate event for the device and diff --git a/cpp/src/arrow/engine/substrait/expression_internal.cc b/cpp/src/arrow/engine/substrait/expression_internal.cc index 480cf30d3033f..56d7956076bf8 100644 --- a/cpp/src/arrow/engine/substrait/expression_internal.cc +++ b/cpp/src/arrow/engine/substrait/expression_internal.cc @@ -133,7 +133,7 @@ Result DecodeScalarFunction( for (const auto& opt : scalar_fn.options()) { ARROW_RETURN_NOT_OK(DecodeOption(opt, &call)); } - return std::move(call); + return call; } std::string EnumToString(int value, const google::protobuf::EnumDescriptor* descriptor) { @@ -279,7 +279,7 @@ Result FromProto(const substrait::AggregateFunction& func, bool i for (int i = 0; i < func.options_size(); i++) { ARROW_RETURN_NOT_OK(DecodeOption(func.options(i), &call)); } - return std::move(call); + return call; } Result FromProto(const substrait::Expression& expr, @@ -1153,7 +1153,7 @@ Result> ToProto( out->set_allocated_null(type.release()); } - return std::move(out); + return out; } static Status AddChildToReferenceSegment( @@ -1226,7 +1226,7 @@ static Result> MakeDirectReference( auto out = std::make_unique(); out->set_allocated_selection(selection.release()); - return std::move(out); + return out; } // Indexes the given Substrait struct-typed expression or root (if expr is empty) using @@ -1292,7 +1292,7 @@ Result> EncodeSubstraitCa } } - return std::move(scalar_fn); + return scalar_fn; } Result>> DatumToLiterals( @@ -1356,7 +1356,7 @@ Result> ToProto( if (auto datum = expr.literal()) { ARROW_ASSIGN_OR_RAISE(auto literal, ToProto(*datum, ext_set, conversion_options)); out->set_allocated_literal(literal.release()); - return std::move(out); + return out; } if (auto param = expr.parameter()) { @@ -1367,7 +1367,7 @@ Result> ToProto( ARROW_ASSIGN_OR_RAISE(out, MakeStructFieldReference(std::move(out), index)); } - return std::move(out); + return out; } auto call = CallNotNull(expr); @@ -1399,7 +1399,7 @@ Result> ToProto( if_then_->set_allocated_else_(arguments.back().release()); out->set_allocated_if_then(if_then_.release()); - return std::move(out); + return out; } } @@ -1423,7 +1423,7 @@ Result> ToProto( for (int index : field_path.indices()) { ARROW_ASSIGN_OR_RAISE(out, MakeStructFieldReference(std::move(out), index)); } - return std::move(out); + return out; } if (call->function_name == "list_element") { @@ -1449,7 +1449,7 @@ Result> ToProto( if_then->set_allocated_else_(arguments[2].release()); out->set_allocated_if_then(if_then.release()); - return std::move(out); + return out; } else if (call->function_name == "cast") { auto cast = std::make_unique(); @@ -1478,7 +1478,7 @@ Result> ToProto( cast->set_allocated_type(to_type.release()); out->set_allocated_cast(cast.release()); - return std::move(out); + return out; } else if (call->function_name == "is_in") { auto or_list = std::make_unique(); @@ -1499,7 +1499,7 @@ Result> ToProto( or_list->mutable_options()->AddAllocated(option.release()); } out->set_allocated_singular_or_list(or_list.release()); - return std::move(out); + return out; } // other expression types dive into extensions immediately @@ -1534,7 +1534,7 @@ Result> ToProto( return maybe_converter.status(); } out->set_allocated_scalar_function(scalar_fn.release()); - return std::move(out); + return out; } } // namespace engine diff --git a/cpp/src/arrow/engine/substrait/extended_expression_internal.cc b/cpp/src/arrow/engine/substrait/extended_expression_internal.cc index 225901c910f25..e2e6d934372dc 100644 --- a/cpp/src/arrow/engine/substrait/extended_expression_internal.cc +++ b/cpp/src/arrow/engine/substrait/extended_expression_internal.cc @@ -143,7 +143,7 @@ Result> CreateExpressionReferenc ARROW_ASSIGN_OR_RAISE(std::unique_ptr expression, ToProto(expr, ext_set, conversion_options)); expr_ref->set_allocated_expression(expression.release()); - return std::move(expr_ref); + return expr_ref; } } // namespace @@ -178,7 +178,7 @@ Result FromProto(const substrait::ExtendedExpression& expressi *ext_set_out = std::move(ext_set); } - return std::move(bound_expressions); + return bound_expressions; } Result> ToProto( @@ -203,7 +203,7 @@ Result> ToProto( expression->mutable_referred_expr()->AddAllocated(expr_ref.release()); } RETURN_NOT_OK(AddExtensionSetToExtendedExpression(*ext_set, expression.get())); - return std::move(expression); + return expression; } } // namespace engine diff --git a/cpp/src/arrow/engine/substrait/extension_set.cc b/cpp/src/arrow/engine/substrait/extension_set.cc index e955084dcdfbb..cefe53d2847ca 100644 --- a/cpp/src/arrow/engine/substrait/extension_set.cc +++ b/cpp/src/arrow/engine/substrait/extension_set.cc @@ -288,7 +288,7 @@ Result ExtensionSet::Make( } } - return std::move(set); + return set; } Result ExtensionSet::DecodeType(uint32_t anchor) const { @@ -799,7 +799,7 @@ Result> GetValueArgs(const SubstraitCall& call, ARROW_ASSIGN_OR_RAISE(compute::Expression arg, call.GetValueArg(index)); expressions.push_back(arg); } - return std::move(expressions); + return expressions; } ExtensionIdRegistry::SubstraitCallToArrow DecodeOptionlessOverflowableArithmetic( @@ -874,7 +874,7 @@ ExtensionIdRegistry::ArrowToSubstraitCall EncodeOptionlessOverflowableArithmetic for (std::size_t i = 0; i < call.arguments.size(); i++) { substrait_call.SetValueArg(static_cast(i), call.arguments[i]); } - return std::move(substrait_call); + return substrait_call; }; } @@ -887,7 +887,7 @@ ExtensionIdRegistry::ArrowToSubstraitCall EncodeBasic(Id substrait_fn_id) { for (std::size_t i = 0; i < call.arguments.size(); i++) { substrait_call.SetValueArg(static_cast(i), call.arguments[i]); } - return std::move(substrait_call); + return substrait_call; }; } @@ -907,7 +907,7 @@ ExtensionIdRegistry::ArrowToSubstraitCall EncodeIsNull(Id substrait_fn_id) { for (std::size_t i = 0; i < call.arguments.size(); i++) { substrait_call.SetValueArg(static_cast(i), call.arguments[i]); } - return std::move(substrait_call); + return substrait_call; }; } diff --git a/cpp/src/arrow/engine/substrait/plan_internal.cc b/cpp/src/arrow/engine/substrait/plan_internal.cc index cc4806878c404..4473b0443eb19 100644 --- a/cpp/src/arrow/engine/substrait/plan_internal.cc +++ b/cpp/src/arrow/engine/substrait/plan_internal.cc @@ -65,7 +65,7 @@ Result> PlanToProto( plan_rel->set_allocated_root(rel_root.release()); subs_plan->mutable_relations()->AddAllocated(plan_rel.release()); RETURN_NOT_OK(AddExtensionSetToPlan(*ext_set, subs_plan.get())); - return std::move(subs_plan); + return subs_plan; } } // namespace engine diff --git a/cpp/src/arrow/engine/substrait/relation_internal.cc b/cpp/src/arrow/engine/substrait/relation_internal.cc index f15f1a5527b7b..6a25bd89f0128 100644 --- a/cpp/src/arrow/engine/substrait/relation_internal.cc +++ b/cpp/src/arrow/engine/substrait/relation_internal.cc @@ -91,7 +91,7 @@ Result GetEmitInfo(const RelMessage& rel, } emit_info.expressions = std::move(proj_field_refs); emit_info.schema = schema(std::move(emit_fields)); - return std::move(emit_info); + return emit_info; } Result ProcessEmitProject( @@ -393,6 +393,7 @@ Result FromProto(const substrait::Rel& rel, const ExtensionSet& auto scan_options = std::make_shared(); scan_options->use_threads = true; + scan_options->add_augmented_fields = false; if (read.has_filter()) { ARROW_ASSIGN_OR_RAISE(scan_options->filter, @@ -1023,7 +1024,7 @@ Result> NamedTableRelationConverter( } read_rel->set_allocated_named_table(read_rel_tn.release()); - return std::move(read_rel); + return read_rel; } Result> ScanRelationConverter( @@ -1067,7 +1068,7 @@ Result> ScanRelationConverter( read_rel_lfs->mutable_items()->AddAllocated(read_rel_lfs_ffs.release()); } read_rel->set_allocated_local_files(read_rel_lfs.release()); - return std::move(read_rel); + return read_rel; } Result> FilterRelationConverter( @@ -1096,7 +1097,7 @@ Result> FilterRelationConverter( ARROW_ASSIGN_OR_RAISE(auto subs_expr, ToProto(bound_expression, ext_set, conversion_options)); filter_rel->set_allocated_condition(subs_expr.release()); - return std::move(filter_rel); + return filter_rel; } } // namespace @@ -1145,7 +1146,7 @@ Result> ToProto( const ConversionOptions& conversion_options) { auto rel = std::make_unique(); RETURN_NOT_OK(SerializeAndCombineRelations(declr, ext_set, &rel, conversion_options)); - return std::move(rel); + return rel; } } // namespace engine diff --git a/cpp/src/arrow/engine/substrait/serde.cc b/cpp/src/arrow/engine/substrait/serde.cc index 9e670f121778e..16d2ace4ac0d7 100644 --- a/cpp/src/arrow/engine/substrait/serde.cc +++ b/cpp/src/arrow/engine/substrait/serde.cc @@ -256,7 +256,7 @@ Result> MakeSingleDeclarationPlan( } else { ARROW_ASSIGN_OR_RAISE(auto plan, acero::ExecPlan::Make()); ARROW_RETURN_NOT_OK(declarations[0].AddToPlan(plan.get())); - return std::move(plan); + return plan; } } diff --git a/cpp/src/arrow/engine/substrait/serde_test.cc b/cpp/src/arrow/engine/substrait/serde_test.cc index 3e80192377937..6762d1e045450 100644 --- a/cpp/src/arrow/engine/substrait/serde_test.cc +++ b/cpp/src/arrow/engine/substrait/serde_test.cc @@ -1064,6 +1064,86 @@ NamedTableProvider AlwaysProvideSameTable(std::shared_ptr table) { }; } +TEST(Substrait, ExecReadRelWithLocalFiles) { + ASSERT_OK_AND_ASSIGN(std::string dir_string, + arrow::internal::GetEnvVar("PARQUET_TEST_DATA")); + + std::string substrait_json = R"({ + "relations": [ + { + "root": { + "input": { + "read": { + "common": { + "direct": {} + }, + "baseSchema": { + "names": [ + "f32", + "f64" + ], + "struct": { + "types": [ + { + "fp32": { + "nullability": "NULLABILITY_REQUIRED" + } + }, + { + "fp64": { + "nullability": "NULLABILITY_REQUIRED" + } + } + ], + "nullability": "NULLABILITY_REQUIRED" + } + }, + "localFiles": { + "items": [ + { + "uriFile": "file://[DIRECTORY_PLACEHOLDER]/byte_stream_split.zstd.parquet", + "parquet": {} + } + ] + } + } + }, + "names": [ + "f32", + "f64" + ] + } + } + ], + "version": { + "minorNumber": 42, + "producer": "my-producer" + } + })"; + const char* placeholder = "[DIRECTORY_PLACEHOLDER]"; + substrait_json.replace(substrait_json.find(placeholder), strlen(placeholder), + dir_string); + + ASSERT_OK_AND_ASSIGN(auto buf, + internal::SubstraitFromJSON("Plan", substrait_json, + /*ignore_unknown_fields=*/false)); + + ASSERT_OK_AND_ASSIGN(auto declarations, + DeserializePlans(*buf, acero::NullSinkNodeConsumer::Make)); + ASSERT_EQ(declarations.size(), 1); + acero::Declaration* decl = &declarations[0]; + ASSERT_EQ(decl->factory_name, "consuming_sink"); + ASSERT_OK_AND_ASSIGN(auto plan, acero::ExecPlan::Make()); + ASSERT_OK_AND_ASSIGN(auto sink_node, declarations[0].AddToPlan(plan.get())); + ASSERT_STREQ(sink_node->kind_name(), "ConsumingSinkNode"); + ASSERT_EQ(sink_node->num_inputs(), 1); + auto& prev_node = sink_node->inputs()[0]; + ASSERT_STREQ(prev_node->kind_name(), "SourceNode"); + + plan->StartProducing(); + ASSERT_FINISHES_OK(plan->finished()); +} + TEST(Substrait, RelWithHint) { ASSERT_OK_AND_ASSIGN(auto buf, internal::SubstraitFromJSON("Rel", R"({ @@ -2443,6 +2523,7 @@ TEST(SubstraitRoundTrip, BasicPlanEndToEnd) { auto scan_options = std::make_shared(); scan_options->projection = compute::project({}, {}); + scan_options->add_augmented_fields = false; const std::string filter_col_left = "shared"; const std::string filter_col_right = "distinct"; auto comp_left_value = compute::field_ref(filter_col_left); diff --git a/cpp/src/arrow/engine/substrait/type_internal.cc b/cpp/src/arrow/engine/substrait/type_internal.cc index 5e7e364fe00c5..b469f5fa0baab 100644 --- a/cpp/src/arrow/engine/substrait/type_internal.cc +++ b/cpp/src/arrow/engine/substrait/type_internal.cc @@ -506,7 +506,7 @@ Result> ToProto( auto out = std::make_unique(); RETURN_NOT_OK( (DataTypeToProtoImpl{out.get(), nullable, ext_set, conversion_options})(type)); - return std::move(out); + return out; } Result> FromProto(const substrait::NamedStruct& named_struct, @@ -583,7 +583,7 @@ Result> ToProto( } named_struct->set_allocated_struct_(struct_.release()); - return std::move(named_struct); + return named_struct; } } // namespace engine diff --git a/cpp/src/arrow/extension/fixed_shape_tensor.cc b/cpp/src/arrow/extension/fixed_shape_tensor.cc index 1101b08307332..944a134a707b1 100644 --- a/cpp/src/arrow/extension/fixed_shape_tensor.cc +++ b/cpp/src/arrow/extension/fixed_shape_tensor.cc @@ -207,44 +207,44 @@ std::shared_ptr FixedShapeTensorType::MakeArray( Result> FixedShapeTensorType::MakeTensor( const std::shared_ptr& scalar) { - const auto ext_scalar = internal::checked_pointer_cast(scalar); - const auto ext_type = - internal::checked_pointer_cast(scalar->type); - if (!is_fixed_width(*ext_type->value_type())) { + const auto& ext_scalar = internal::checked_cast(*scalar); + const auto& ext_type = + internal::checked_cast(*scalar->type); + if (!is_fixed_width(*ext_type.value_type())) { return Status::TypeError("Cannot convert non-fixed-width values to Tensor."); } - const auto array = - internal::checked_pointer_cast(ext_scalar->value)->value; + const auto& array = + internal::checked_cast(ext_scalar.value.get())->value; if (array->null_count() > 0) { return Status::Invalid("Cannot convert data with nulls to Tensor."); } - const auto value_type = - internal::checked_pointer_cast(ext_type->value_type()); - const auto byte_width = value_type->byte_width(); + const auto& value_type = + internal::checked_cast(*ext_type.value_type()); + const auto byte_width = value_type.byte_width(); - std::vector permutation = ext_type->permutation(); + std::vector permutation = ext_type.permutation(); if (permutation.empty()) { - permutation.resize(ext_type->ndim()); + permutation.resize(ext_type.ndim()); std::iota(permutation.begin(), permutation.end(), 0); } - std::vector shape = ext_type->shape(); + std::vector shape = ext_type.shape(); internal::Permute(permutation, &shape); - std::vector dim_names = ext_type->dim_names(); + std::vector dim_names = ext_type.dim_names(); if (!dim_names.empty()) { internal::Permute(permutation, &dim_names); } std::vector strides; - RETURN_NOT_OK(ComputeStrides(*value_type.get(), shape, permutation, &strides)); + RETURN_NOT_OK(ComputeStrides(value_type, shape, permutation, &strides)); const auto start_position = array->offset() * byte_width; const auto size = std::accumulate(shape.begin(), shape.end(), static_cast(1), std::multiplies<>()); const auto buffer = SliceBuffer(array->data()->buffers[1], start_position, size * byte_width); - return Tensor::Make(ext_type->value_type(), buffer, shape, strides, dim_names); + return Tensor::Make(ext_type.value_type(), buffer, shape, strides, dim_names); } Result> FixedShapeTensorArray::FromTensor( @@ -257,12 +257,14 @@ Result> FixedShapeTensorArray::FromTensor permutation.erase(permutation.begin()); std::vector cell_shape; + cell_shape.reserve(permutation.size()); for (auto i : permutation) { cell_shape.emplace_back(tensor->shape()[i]); } std::vector dim_names; if (!tensor->dim_names().empty()) { + dim_names.reserve(permutation.size()); for (auto i : permutation) { dim_names.emplace_back(tensor->dim_names()[i]); } @@ -337,9 +339,9 @@ const Result> FixedShapeTensorArray::ToTensor() const { // To convert an array of n dimensional tensors to a n+1 dimensional tensor we // interpret the array's length as the first dimension the new tensor. - const auto ext_type = - internal::checked_pointer_cast(this->type()); - const auto value_type = ext_type->value_type(); + const auto& ext_type = + internal::checked_cast(*this->type()); + const auto& value_type = ext_type.value_type(); ARROW_RETURN_IF( !is_fixed_width(*value_type), Status::TypeError(value_type->ToString(), " is not valid data type for a tensor")); @@ -350,35 +352,35 @@ const Result> FixedShapeTensorArray::ToTensor() const { // will get permutation index 0 and remaining values from ext_type->permutation() need // to be shifted to fill the [1, ndim+1) range. Computed permutation will be used to // generate the new tensor's shape, strides and dim_names. - std::vector permutation = ext_type->permutation(); + std::vector permutation = ext_type.permutation(); if (permutation.empty()) { - permutation.resize(ext_type->ndim() + 1); + permutation.resize(ext_type.ndim() + 1); std::iota(permutation.begin(), permutation.end(), 0); } else { - for (auto i = 0; i < static_cast(ext_type->ndim()); i++) { + for (auto i = 0; i < static_cast(ext_type.ndim()); i++) { permutation[i] += 1; } permutation.insert(permutation.begin(), 1, 0); } - std::vector dim_names = ext_type->dim_names(); + std::vector dim_names = ext_type.dim_names(); if (!dim_names.empty()) { dim_names.insert(dim_names.begin(), 1, ""); internal::Permute(permutation, &dim_names); } - std::vector shape = ext_type->shape(); + std::vector shape = ext_type.shape(); auto cell_size = std::accumulate(shape.begin(), shape.end(), static_cast(1), std::multiplies<>()); shape.insert(shape.begin(), 1, this->length()); internal::Permute(permutation, &shape); std::vector tensor_strides; - const auto fw_value_type = internal::checked_pointer_cast(value_type); + const auto* fw_value_type = internal::checked_cast(value_type.get()); ARROW_RETURN_NOT_OK( - ComputeStrides(*fw_value_type.get(), shape, permutation, &tensor_strides)); + ComputeStrides(*fw_value_type, shape, permutation, &tensor_strides)); - const auto raw_buffer = this->storage()->data()->child_data[0]->buffers[1]; + const auto& raw_buffer = this->storage()->data()->child_data[0]->buffers[1]; ARROW_ASSIGN_OR_RAISE( const auto buffer, SliceBufferSafe(raw_buffer, this->offset() * cell_size * value_type->byte_width())); @@ -389,7 +391,7 @@ const Result> FixedShapeTensorArray::ToTensor() const { Result> FixedShapeTensorType::Make( const std::shared_ptr& value_type, const std::vector& shape, const std::vector& permutation, const std::vector& dim_names) { - const auto ndim = shape.size(); + const size_t ndim = shape.size(); if (!permutation.empty() && ndim != permutation.size()) { return Status::Invalid("permutation size must match shape size. Expected: ", ndim, " Got: ", permutation.size()); @@ -402,18 +404,18 @@ Result> FixedShapeTensorType::Make( RETURN_NOT_OK(internal::IsPermutationValid(permutation)); } - const auto size = std::accumulate(shape.begin(), shape.end(), static_cast(1), - std::multiplies<>()); + const int64_t size = std::accumulate(shape.begin(), shape.end(), + static_cast(1), std::multiplies<>()); return std::make_shared(value_type, static_cast(size), shape, permutation, dim_names); } const std::vector& FixedShapeTensorType::strides() { if (strides_.empty()) { - auto value_type = internal::checked_pointer_cast(this->value_type_); + auto value_type = internal::checked_cast(this->value_type_.get()); std::vector tensor_strides; - ARROW_CHECK_OK(ComputeStrides(*value_type.get(), this->shape(), this->permutation(), - &tensor_strides)); + ARROW_CHECK_OK( + ComputeStrides(*value_type, this->shape(), this->permutation(), &tensor_strides)); strides_ = tensor_strides; } return strides_; diff --git a/cpp/src/arrow/extension/fixed_shape_tensor.h b/cpp/src/arrow/extension/fixed_shape_tensor.h index 3fec79b5c2a3c..20ec20a64c2d4 100644 --- a/cpp/src/arrow/extension/fixed_shape_tensor.h +++ b/cpp/src/arrow/extension/fixed_shape_tensor.h @@ -67,10 +67,10 @@ class ARROW_EXPORT FixedShapeTensorType : public ExtensionType { size_t ndim() const { return shape_.size(); } /// Shape of tensor elements - const std::vector shape() const { return shape_; } + const std::vector& shape() const { return shape_; } /// Value type of tensor elements - const std::shared_ptr value_type() const { return value_type_; } + const std::shared_ptr& value_type() const { return value_type_; } /// Strides of tensor elements. Strides state offset in bytes between adjacent /// elements along each dimension. In case permutation is non-empty strides are diff --git a/cpp/src/arrow/field_ref_test.cc b/cpp/src/arrow/field_ref_test.cc index 0cb2da4f709a1..25c8aa0b71623 100644 --- a/cpp/src/arrow/field_ref_test.cc +++ b/cpp/src/arrow/field_ref_test.cc @@ -199,7 +199,7 @@ struct FieldPathTestCase { Table::Make(out.schema, {out.v0.chunked_array, out.v1.chunked_array}, kNumRows); ARROW_RETURN_NOT_OK(out.table->ValidateFull()); - return std::move(out); + return out; } private: diff --git a/cpp/src/arrow/filesystem/azurefs.cc b/cpp/src/arrow/filesystem/azurefs.cc index ac563b134586e..809aef32b3623 100644 --- a/cpp/src/arrow/filesystem/azurefs.cc +++ b/cpp/src/arrow/filesystem/azurefs.cc @@ -117,8 +117,12 @@ Status AzureOptions::ExtractFromUriQuery(const Uri& uri) { credential_kind = CredentialKind::kDefault; } else if (kv.second == "anonymous") { credential_kind = CredentialKind::kAnonymous; + } else if (kv.second == "cli") { + credential_kind = CredentialKind::kCLI; } else if (kv.second == "workload_identity") { credential_kind = CredentialKind::kWorkloadIdentity; + } else if (kv.second == "environment") { + credential_kind = CredentialKind::kEnvironment; } else { // Other credential kinds should be inferred from the given // parameters automatically. @@ -168,9 +172,15 @@ Status AzureOptions::ExtractFromUriQuery(const Uri& uri) { case CredentialKind::kAnonymous: RETURN_NOT_OK(ConfigureAnonymousCredential()); break; + case CredentialKind::kCLI: + RETURN_NOT_OK(ConfigureCLICredential()); + break; case CredentialKind::kWorkloadIdentity: RETURN_NOT_OK(ConfigureWorkloadIdentityCredential()); break; + case CredentialKind::kEnvironment: + RETURN_NOT_OK(ConfigureEnvironmentCredential()); + break; default: // Default credential break; @@ -250,8 +260,10 @@ bool AzureOptions::Equals(const AzureOptions& other) const { return storage_shared_key_credential_->AccountName == other.storage_shared_key_credential_->AccountName; case CredentialKind::kClientSecret: + case CredentialKind::kCLI: case CredentialKind::kManagedIdentity: case CredentialKind::kWorkloadIdentity: + case CredentialKind::kEnvironment: return token_credential_->GetCredentialName() == other.token_credential_->GetCredentialName(); } @@ -331,12 +343,24 @@ Status AzureOptions::ConfigureManagedIdentityCredential(const std::string& clien return Status::OK(); } +Status AzureOptions::ConfigureCLICredential() { + credential_kind_ = CredentialKind::kCLI; + token_credential_ = std::make_shared(); + return Status::OK(); +} + Status AzureOptions::ConfigureWorkloadIdentityCredential() { credential_kind_ = CredentialKind::kWorkloadIdentity; token_credential_ = std::make_shared(); return Status::OK(); } +Status AzureOptions::ConfigureEnvironmentCredential() { + credential_kind_ = CredentialKind::kEnvironment; + token_credential_ = std::make_shared(); + return Status::OK(); +} + Result> AzureOptions::MakeBlobServiceClient() const { if (account_name.empty()) { @@ -352,7 +376,9 @@ Result> AzureOptions::MakeBlobServiceC [[fallthrough]]; case CredentialKind::kClientSecret: case CredentialKind::kManagedIdentity: + case CredentialKind::kCLI: case CredentialKind::kWorkloadIdentity: + case CredentialKind::kEnvironment: return std::make_unique(AccountBlobUrl(account_name), token_credential_); case CredentialKind::kStorageSharedKey: @@ -378,7 +404,9 @@ AzureOptions::MakeDataLakeServiceClient() const { [[fallthrough]]; case CredentialKind::kClientSecret: case CredentialKind::kManagedIdentity: + case CredentialKind::kCLI: case CredentialKind::kWorkloadIdentity: + case CredentialKind::kEnvironment: return std::make_unique( AccountDfsUrl(account_name), token_credential_); case CredentialKind::kStorageSharedKey: @@ -845,7 +873,7 @@ class ObjectInputFile final : public io::RandomAccessFile { DCHECK_LE(bytes_read, nbytes); RETURN_NOT_OK(buffer->Resize(bytes_read)); } - return std::move(buffer); + return buffer; } Result Read(int64_t nbytes, void* out) override { @@ -857,7 +885,7 @@ class ObjectInputFile final : public io::RandomAccessFile { Result> Read(int64_t nbytes) override { ARROW_ASSIGN_OR_RAISE(auto buffer, ReadAt(pos_, nbytes)); pos_ += buffer->size(); - return std::move(buffer); + return buffer; } private: diff --git a/cpp/src/arrow/filesystem/azurefs.h b/cpp/src/arrow/filesystem/azurefs.h index 667b4e372ae59..93d6ec2f945b4 100644 --- a/cpp/src/arrow/filesystem/azurefs.h +++ b/cpp/src/arrow/filesystem/azurefs.h @@ -119,7 +119,9 @@ struct ARROW_EXPORT AzureOptions { kStorageSharedKey, kClientSecret, kManagedIdentity, + kCLI, kWorkloadIdentity, + kEnvironment, } credential_kind_ = CredentialKind::kDefault; std::shared_ptr @@ -141,18 +143,14 @@ struct ARROW_EXPORT AzureOptions { /// /// 1. abfs[s]://[:\@]\.blob.core.windows.net /// [/\[/\]] - /// 2. abfs[s]://\[:\]@\.dfs.core.windows.net - /// [/path] + /// 2. abfs[s]://\[:\]\@\.dfs.core.windows.net[/path] /// 3. abfs[s]://[\]@]\[\<:port\>] /// [/\[/path]] /// 4. abfs[s]://[\]@]\[/path] /// - /// 1. and 2. are compatible with the Azure Data Lake Storage Gen2 URIs: - /// https://learn.microsoft.com/en-us/azure/storage/blobs/data-lake-storage-introduction-abfs-uri - /// - /// 3. is for Azure Blob Storage compatible service including Azurite. - /// - /// 4. is a shorter version of 1. and 2. + /// (1) and (2) are compatible with the Azure Data Lake Storage Gen2 URIs + /// [1], (3) is for Azure Blob Storage compatible service including Azurite, + /// and (4) is a shorter version of (1) and (2). /// /// Note that there is no difference between abfs and abfss. HTTPS is /// used with abfs by default. You can force to use HTTP by specifying @@ -163,12 +161,15 @@ struct ARROW_EXPORT AzureOptions { /// * blob_storage_authority: Set AzureOptions::blob_storage_authority /// * dfs_storage_authority: Set AzureOptions::dfs_storage_authority /// * enable_tls: If it's "false" or "0", HTTP not HTTPS is used. - /// * credential_kind: One of "default", "anonymous", - /// "workload_identity". If "default" is specified, it's just - /// ignored. If "anonymous" is specified, + /// * credential_kind: One of "default", "anonymous", "workload_identity", + /// "environment" or "cli". If "default" is specified, it's + /// just ignored. If "anonymous" is specified, /// AzureOptions::ConfigureAnonymousCredential() is called. If /// "workload_identity" is specified, - /// AzureOptions::ConfigureWorkloadIdentityCredential() is called. + /// AzureOptions::ConfigureWorkloadIdentityCredential() is called. If + /// "environment" is specified, + /// AzureOptions::ConfigureEnvironmentCredential() is called. If "cli" is + /// specified, AzureOptions::ConfigureCLICredential() is called. /// * tenant_id: You must specify "client_id" and "client_secret" /// too. AzureOptions::ConfigureClientSecretCredential() is called. /// * client_id: If you don't specify "tenant_id" and @@ -178,6 +179,9 @@ struct ARROW_EXPORT AzureOptions { /// AzureOptions::ConfigureClientSecretCredential() is called. /// * client_secret: You must specify "tenant_id" and "client_id" /// too. AzureOptions::ConfigureClientSecretCredential() is called. + /// + /// [1]: + /// https://learn.microsoft.com/en-us/azure/storage/blobs/data-lake-storage-introduction-abfs-uri static Result FromUri(const Uri& uri, std::string* out_path); static Result FromUri(const std::string& uri, std::string* out_path); @@ -188,7 +192,9 @@ struct ARROW_EXPORT AzureOptions { const std::string& client_id, const std::string& client_secret); Status ConfigureManagedIdentityCredential(const std::string& client_id = std::string()); + Status ConfigureCLICredential(); Status ConfigureWorkloadIdentityCredential(); + Status ConfigureEnvironmentCredential(); bool Equals(const AzureOptions& other) const; @@ -226,7 +232,7 @@ struct ARROW_EXPORT AzureOptions { /// overwriting. /// - When you use the ListBlobs operation without specifying a delimiter, the results /// include both directories and blobs. If you choose to use a delimiter, use only a -/// forward slash (/) -- the only supported delimiter. +/// forward slash (/) \--- the only supported delimiter. /// - If you use the DeleteBlob API to delete a directory, that directory is deleted only /// if it's empty. This means that you can't use the Blob API delete directories /// recursively. diff --git a/cpp/src/arrow/filesystem/azurefs_test.cc b/cpp/src/arrow/filesystem/azurefs_test.cc index ed09bfc2fadd7..05ff3e551cd81 100644 --- a/cpp/src/arrow/filesystem/azurefs_test.cc +++ b/cpp/src/arrow/filesystem/azurefs_test.cc @@ -521,6 +521,13 @@ TEST(AzureFileSystem, InitializeWithManagedIdentityCredential) { EXPECT_OK_AND_ASSIGN(fs, AzureFileSystem::Make(options)); } +TEST(AzureFileSystem, InitializeWithCLICredential) { + AzureOptions options; + options.account_name = "dummy-account-name"; + ARROW_EXPECT_OK(options.ConfigureCLICredential()); + EXPECT_OK_AND_ASSIGN(auto fs, AzureFileSystem::Make(options)); +} + TEST(AzureFileSystem, InitializeWithWorkloadIdentityCredential) { AzureOptions options; options.account_name = "dummy-account-name"; @@ -528,6 +535,13 @@ TEST(AzureFileSystem, InitializeWithWorkloadIdentityCredential) { EXPECT_OK_AND_ASSIGN(auto fs, AzureFileSystem::Make(options)); } +TEST(AzureFileSystem, InitializeWithEnvironmentCredential) { + AzureOptions options; + options.account_name = "dummy-account-name"; + ARROW_EXPECT_OK(options.ConfigureEnvironmentCredential()); + EXPECT_OK_AND_ASSIGN(auto fs, AzureFileSystem::Make(options)); +} + TEST(AzureFileSystem, OptionsCompare) { AzureOptions options; EXPECT_TRUE(options.Equals(options)); @@ -660,6 +674,15 @@ class TestAzureOptions : public ::testing::Test { ASSERT_EQ(options.credential_kind_, AzureOptions::CredentialKind::kManagedIdentity); } + void TestFromUriCredentialCLI() { + ASSERT_OK_AND_ASSIGN( + auto options, + AzureOptions::FromUri("abfs://account.blob.core.windows.net/container/dir/blob?" + "credential_kind=cli", + nullptr)); + ASSERT_EQ(options.credential_kind_, AzureOptions::CredentialKind::kCLI); + } + void TestFromUriCredentialWorkloadIdentity() { ASSERT_OK_AND_ASSIGN( auto options, @@ -669,6 +692,15 @@ class TestAzureOptions : public ::testing::Test { ASSERT_EQ(options.credential_kind_, AzureOptions::CredentialKind::kWorkloadIdentity); } + void TestFromUriCredentialEnvironment() { + ASSERT_OK_AND_ASSIGN( + auto options, + AzureOptions::FromUri("abfs://account.blob.core.windows.net/container/dir/blob?" + "credential_kind=environment", + nullptr)); + ASSERT_EQ(options.credential_kind_, AzureOptions::CredentialKind::kEnvironment); + } + void TestFromUriCredentialInvalid() { ASSERT_RAISES(Invalid, AzureOptions::FromUri( "abfs://file_system@account.dfs.core.windows.net/dir/file?" @@ -717,9 +749,13 @@ TEST_F(TestAzureOptions, FromUriCredentialClientSecret) { TEST_F(TestAzureOptions, FromUriCredentialManagedIdentity) { TestFromUriCredentialManagedIdentity(); } +TEST_F(TestAzureOptions, FromUriCredentialCLI) { TestFromUriCredentialCLI(); } TEST_F(TestAzureOptions, FromUriCredentialWorkloadIdentity) { TestFromUriCredentialWorkloadIdentity(); } +TEST_F(TestAzureOptions, FromUriCredentialEnvironment) { + TestFromUriCredentialEnvironment(); +} TEST_F(TestAzureOptions, FromUriCredentialInvalid) { TestFromUriCredentialInvalid(); } TEST_F(TestAzureOptions, FromUriBlobStorageAuthority) { TestFromUriBlobStorageAuthority(); diff --git a/cpp/src/arrow/filesystem/filesystem.cc b/cpp/src/arrow/filesystem/filesystem.cc index b79af08385c0c..284be685fa800 100644 --- a/cpp/src/arrow/filesystem/filesystem.cc +++ b/cpp/src/arrow/filesystem/filesystem.cc @@ -761,8 +761,8 @@ class FileSystemFactoryRegistry { RETURN_NOT_OK(CheckValid()); auto [it, success] = scheme_to_factory_.emplace( - std::move(scheme), Registered{std::move(factory), std::move(finalizer)}); - if (success) { + std::move(scheme), Registered{factory, std::move(finalizer)}); + if (success || (it->second.ok() && it->second->factory == factory)) { return Status::OK(); } diff --git a/cpp/src/arrow/filesystem/localfs.cc b/cpp/src/arrow/filesystem/localfs.cc index 25ac04b758f9b..22d802d8f9f7f 100644 --- a/cpp/src/arrow/filesystem/localfs.cc +++ b/cpp/src/arrow/filesystem/localfs.cc @@ -524,7 +524,7 @@ class AsyncStatSelector { ARROW_ASSIGN_OR_RAISE( auto gen, MakeBackgroundGenerator(Iterator(DiscoveryImplIterator( - std::move(dir_fn), nesting_depth, std::move(selector), + dir_fn, nesting_depth, std::move(selector), discovery_state, io_context, file_info_batch_size)), io_context.executor())); gen = MakeTransferredGenerator(std::move(gen), io_context.executor()); diff --git a/cpp/src/arrow/filesystem/localfs_test.cc b/cpp/src/arrow/filesystem/localfs_test.cc index 1a20e44bc36e2..6dd7a8c75586c 100644 --- a/cpp/src/arrow/filesystem/localfs_test.cc +++ b/cpp/src/arrow/filesystem/localfs_test.cc @@ -154,15 +154,16 @@ TEST(FileSystemFromUri, RuntimeRegisteredFactory) { EXPECT_THAT(FileSystemFromUri("slowfile2:///hey/yo", &path), Raises(StatusCode::Invalid)); - EXPECT_THAT(RegisterFileSystemFactory("slowfile2", {SlowFileSystemFactory, "", 0}), - Ok()); + EXPECT_THAT( + RegisterFileSystemFactory("slowfile2", {SlowFileSystemFactory, __FILE__, __LINE__}), + Ok()); ASSERT_OK_AND_ASSIGN(auto fs, FileSystemFromUri("slowfile2:///hey/yo", &path)); EXPECT_EQ(path, "/hey/yo"); EXPECT_EQ(fs->type_name(), "slow"); EXPECT_THAT( - RegisterFileSystemFactory("slowfile2", {SlowFileSystemFactory, "", 0}), + RegisterFileSystemFactory("slowfile2", {SlowFileSystemFactory, __FILE__, __LINE__}), Raises(StatusCode::KeyError, testing::HasSubstr("Attempted to register factory for scheme 'slowfile2' " "but that scheme is already registered"))); @@ -542,7 +543,7 @@ struct DirTreeCreator { Result Create(const std::string& base) { FileInfoVector infos; RETURN_NOT_OK(Create(base, 0, &infos)); - return std::move(infos); + return infos; } Status Create(const std::string& base, int depth, FileInfoVector* infos) { diff --git a/cpp/src/arrow/filesystem/s3fs.cc b/cpp/src/arrow/filesystem/s3fs.cc index 640888e1c4fa5..c456be2d0d3cd 100644 --- a/cpp/src/arrow/filesystem/s3fs.cc +++ b/cpp/src/arrow/filesystem/s3fs.cc @@ -601,44 +601,81 @@ class S3Client : public Aws::S3::S3Client { public: using Aws::S3::S3Client::S3Client; + static inline constexpr auto kBucketRegionHeaderName = "x-amz-bucket-region"; + + std::string GetBucketRegionFromHeaders( + const Aws::Http::HeaderValueCollection& headers) { + const auto it = headers.find(ToAwsString(kBucketRegionHeaderName)); + if (it != headers.end()) { + return std::string(FromAwsString(it->second)); + } + return std::string(); + } + + template + Result GetBucketRegionFromError( + const std::string& bucket, const Aws::Client::AWSError& error) { + std::string region = GetBucketRegionFromHeaders(error.GetResponseHeaders()); + if (!region.empty()) { + return region; + } else if (error.GetResponseCode() == Aws::Http::HttpResponseCode::NOT_FOUND) { + return Status::IOError("Bucket '", bucket, "' not found"); + } else { + return ErrorToStatus( + std::forward_as_tuple("When resolving region for bucket '", bucket, "': "), + "HeadBucket", error); + } + } + +#if ARROW_AWS_SDK_VERSION_CHECK(1, 11, 212) + // HeadBucketResult::GetBucketRegion appeared in AWS SDK 1.11.212 + Result GetBucketRegion(const std::string& bucket, + const S3Model::HeadBucketRequest& request) { + auto outcome = this->HeadBucket(request); + if (!outcome.IsSuccess()) { + return GetBucketRegionFromError(bucket, outcome.GetError()); + } + auto&& region = std::move(outcome).GetResult().GetBucketRegion(); + if (region.empty()) { + return Status::IOError("When resolving region for bucket '", request.GetBucket(), + "': missing 'x-amz-bucket-region' header in response"); + } + return region; + } +#else // To get a bucket's region, we must extract the "x-amz-bucket-region" header // from the response to a HEAD bucket request. // Unfortunately, the S3Client APIs don't let us access the headers of successful // responses. So we have to cook a AWS request and issue it ourselves. - - Result GetBucketRegion(const S3Model::HeadBucketRequest& request) { + Result GetBucketRegion(const std::string& bucket, + const S3Model::HeadBucketRequest& request) { auto uri = GeneratePresignedUrl(request.GetBucket(), /*key=*/"", Aws::Http::HttpMethod::HTTP_HEAD); // NOTE: The signer region argument isn't passed here, as there's no easy // way of computing it (the relevant method is private). auto outcome = MakeRequest(uri, request, Aws::Http::HttpMethod::HTTP_HEAD, Aws::Auth::SIGV4_SIGNER); - const auto code = outcome.IsSuccess() ? outcome.GetResult().GetResponseCode() - : outcome.GetError().GetResponseCode(); - const auto& headers = outcome.IsSuccess() - ? outcome.GetResult().GetHeaderValueCollection() - : outcome.GetError().GetResponseHeaders(); - - const auto it = headers.find(ToAwsString("x-amz-bucket-region")); - if (it == headers.end()) { - if (code == Aws::Http::HttpResponseCode::NOT_FOUND) { - return Status::IOError("Bucket '", request.GetBucket(), "' not found"); - } else if (!outcome.IsSuccess()) { - return ErrorToStatus(std::forward_as_tuple("When resolving region for bucket '", - request.GetBucket(), "': "), - "HeadBucket", outcome.GetError()); - } else { - return Status::IOError("When resolving region for bucket '", request.GetBucket(), - "': missing 'x-amz-bucket-region' header in response"); - } + if (!outcome.IsSuccess()) { + return GetBucketRegionFromError(bucket, outcome.GetError()); + } + std::string region = + GetBucketRegionFromHeaders(outcome.GetResult().GetHeaderValueCollection()); + if (!region.empty()) { + return region; + } else if (outcome.GetResult().GetResponseCode() == + Aws::Http::HttpResponseCode::NOT_FOUND) { + return Status::IOError("Bucket '", request.GetBucket(), "' not found"); + } else { + return Status::IOError("When resolving region for bucket '", request.GetBucket(), + "': missing 'x-amz-bucket-region' header in response"); } - return std::string(FromAwsString(it->second)); } +#endif Result GetBucketRegion(const std::string& bucket) { S3Model::HeadBucketRequest req; req.SetBucket(ToAwsString(bucket)); - return GetBucketRegion(req); + return GetBucketRegion(bucket, req); } S3Model::CompleteMultipartUploadOutcome CompleteMultipartUploadWithErrorFixup( @@ -1510,7 +1547,8 @@ class ObjectInputFile final : public io::RandomAccessFile { DCHECK_LE(bytes_read, nbytes); RETURN_NOT_OK(buf->Resize(bytes_read)); } - return std::move(buf); + // R build with openSUSE155 requires an explicit shared_ptr construction + return std::shared_ptr(std::move(buf)); } Result Read(int64_t nbytes, void* out) override { @@ -1522,7 +1560,7 @@ class ObjectInputFile final : public io::RandomAccessFile { Result> Read(int64_t nbytes) override { ARROW_ASSIGN_OR_RAISE(auto buffer, ReadAt(pos_, nbytes)); pos_ += buffer->size(); - return std::move(buffer); + return buffer; } protected: @@ -2859,6 +2897,7 @@ Status S3FileSystem::CreateDir(const std::string& s, bool recursive) { return impl_->CreateBucket(path.bucket); } + FileInfo file_info; // Create object if (recursive) { // Ensure bucket exists @@ -2866,10 +2905,33 @@ Status S3FileSystem::CreateDir(const std::string& s, bool recursive) { if (!bucket_exists) { RETURN_NOT_OK(impl_->CreateBucket(path.bucket)); } + + auto key_i = path.key_parts.begin(); + std::string parent_key{}; + if (options().check_directory_existence_before_creation) { + // Walk up the directory first to find the first existing parent + for (const auto& part : path.key_parts) { + parent_key += part; + parent_key += kSep; + } + for (key_i = path.key_parts.end(); key_i-- != path.key_parts.begin();) { + ARROW_ASSIGN_OR_RAISE(file_info, + this->GetFileInfo(path.bucket + kSep + parent_key)); + if (file_info.type() != FileType::NotFound) { + // Found! + break; + } else { + // remove the kSep and the part + parent_key.pop_back(); + parent_key.erase(parent_key.end() - key_i->size(), parent_key.end()); + } + } + key_i++; // Above for loop moves one extra iterator at the end + } // Ensure that all parents exist, then the directory itself - std::string parent_key; - for (const auto& part : path.key_parts) { - parent_key += part; + // Create all missing directories + for (; key_i < path.key_parts.end(); ++key_i) { + parent_key += *key_i; parent_key += kSep; RETURN_NOT_OK(impl_->CreateEmptyDir(path.bucket, parent_key)); } @@ -2887,11 +2949,18 @@ Status S3FileSystem::CreateDir(const std::string& s, bool recursive) { "': parent directory does not exist"); } } + } - // XXX Should we check that no non-directory entry exists? - // Minio does it for us, not sure about other S3 implementations. - return impl_->CreateEmptyDir(path.bucket, path.key); + // Check if the directory exists already + if (options().check_directory_existence_before_creation) { + ARROW_ASSIGN_OR_RAISE(file_info, this->GetFileInfo(path.full_path)); + if (file_info.type() != FileType::NotFound) { + return Status::OK(); + } } + // XXX Should we check that no non-directory entry exists? + // Minio does it for us, not sure about other S3 implementations. + return impl_->CreateEmptyDir(path.bucket, path.key); } Status S3FileSystem::DeleteDir(const std::string& s) { diff --git a/cpp/src/arrow/filesystem/s3fs.h b/cpp/src/arrow/filesystem/s3fs.h index 82d08bc5ea89a..fbbe9d0b3f42b 100644 --- a/cpp/src/arrow/filesystem/s3fs.h +++ b/cpp/src/arrow/filesystem/s3fs.h @@ -166,6 +166,17 @@ struct ARROW_EXPORT S3Options { /// Whether to allow deletion of buckets bool allow_bucket_deletion = false; + /// Whether to allow pessimistic directory creation in CreateDir function + /// + /// By default, CreateDir function will try to create the directory without checking its + /// existence. It's an optimization to try directory creation and catch the error, + /// rather than issue two dependent I/O calls. + /// Though for key/value storage like Google Cloud Storage, too many creation calls will + /// breach the rate limit for object mutation operations and cause serious consequences. + /// It's also possible you don't have creation access for the parent directory. Set it + /// to be true to address these scenarios. + bool check_directory_existence_before_creation = false; + /// \brief Default metadata for OpenOutputStream. /// /// This will be ignored if non-empty metadata is passed to OpenOutputStream. diff --git a/cpp/src/arrow/filesystem/s3fs_test.cc b/cpp/src/arrow/filesystem/s3fs_test.cc index 88cc96956e34c..7bfa120eda678 100644 --- a/cpp/src/arrow/filesystem/s3fs_test.cc +++ b/cpp/src/arrow/filesystem/s3fs_test.cc @@ -922,9 +922,13 @@ TEST_F(TestS3FS, CreateDir) { // New "directory" AssertFileInfo(fs_.get(), "bucket/newdir", FileType::NotFound); - ASSERT_OK(fs_->CreateDir("bucket/newdir")); + ASSERT_OK(fs_->CreateDir("bucket/newdir", /*recursive=*/false)); AssertFileInfo(fs_.get(), "bucket/newdir", FileType::Directory); + // By default CreateDir uses recursvie mode, make it explictly to be false + ASSERT_RAISES(IOError, + fs_->CreateDir("bucket/newdir/newsub/newsubsub", /*recursive=*/false)); + // New "directory", recursive ASSERT_OK(fs_->CreateDir("bucket/newdir/newsub/newsubsub", /*recursive=*/true)); AssertFileInfo(fs_.get(), "bucket/newdir/newsub", FileType::Directory); @@ -939,6 +943,31 @@ TEST_F(TestS3FS, CreateDir) { // Extraneous slashes ASSERT_RAISES(Invalid, fs_->CreateDir("bucket//somedir")); ASSERT_RAISES(Invalid, fs_->CreateDir("bucket/somedir//newdir")); + + // check existing before creation + options_.check_directory_existence_before_creation = true; + MakeFileSystem(); + // New "directory" again + AssertFileInfo(fs_.get(), "bucket/checknewdir", FileType::NotFound); + ASSERT_OK(fs_->CreateDir("bucket/checknewdir")); + AssertFileInfo(fs_.get(), "bucket/checknewdir", FileType::Directory); + + ASSERT_RAISES(IOError, fs_->CreateDir("bucket/checknewdir/newsub/newsubsub/newsubsub/", + /*recursive=*/false)); + + // New "directory" again, recursive + ASSERT_OK(fs_->CreateDir("bucket/checknewdir/newsub/newsubsub", /*recursive=*/true)); + AssertFileInfo(fs_.get(), "bucket/checknewdir/newsub", FileType::Directory); + AssertFileInfo(fs_.get(), "bucket/checknewdir/newsub/newsubsub", FileType::Directory); + AssertFileInfo(fs_.get(), "bucket/checknewdir/newsub/newsubsub/newsubsub", + FileType::NotFound); + // Try creation with the same name + ASSERT_OK(fs_->CreateDir("bucket/checknewdir/newsub/newsubsub/newsubsub/", + /*recursive=*/true)); + AssertFileInfo(fs_.get(), "bucket/checknewdir/newsub", FileType::Directory); + AssertFileInfo(fs_.get(), "bucket/checknewdir/newsub/newsubsub", FileType::Directory); + AssertFileInfo(fs_.get(), "bucket/checknewdir/newsub/newsubsub/newsubsub", + FileType::Directory); } TEST_F(TestS3FS, DeleteFile) { diff --git a/cpp/src/arrow/filesystem/util_internal.cc b/cpp/src/arrow/filesystem/util_internal.cc index d69f6c896d08e..be43e14e84337 100644 --- a/cpp/src/arrow/filesystem/util_internal.cc +++ b/cpp/src/arrow/filesystem/util_internal.cc @@ -103,7 +103,7 @@ Result ParseFileSystemUri(const std::string& uri_string) { return status; #endif } - return std::move(uri); + return uri; } #ifdef _WIN32 diff --git a/cpp/src/arrow/flight/client.cc b/cpp/src/arrow/flight/client.cc index 4d4f13a09fb26..58a3ba4ab83e5 100644 --- a/cpp/src/arrow/flight/client.cc +++ b/cpp/src/arrow/flight/client.cc @@ -591,7 +591,7 @@ arrow::Result FlightClient::CancelFlightInfo( ARROW_ASSIGN_OR_RAISE(auto cancel_result, CancelFlightInfoResult::Deserialize( std::string_view(*result->body))); ARROW_RETURN_NOT_OK(stream->Drain()); - return std::move(cancel_result); + return cancel_result; } arrow::Result FlightClient::RenewFlightEndpoint( @@ -603,7 +603,7 @@ arrow::Result FlightClient::RenewFlightEndpoint( ARROW_ASSIGN_OR_RAISE(auto renewed_endpoint, FlightEndpoint::Deserialize(std::string_view(*result->body))); ARROW_RETURN_NOT_OK(stream->Drain()); - return std::move(renewed_endpoint); + return renewed_endpoint; } arrow::Result> FlightClient::ListActions( diff --git a/cpp/src/arrow/flight/client.h b/cpp/src/arrow/flight/client.h index 330fa8bad730d..613903108949e 100644 --- a/cpp/src/arrow/flight/client.h +++ b/cpp/src/arrow/flight/client.h @@ -15,8 +15,7 @@ // specific language governing permissions and limitations // under the License. -/// \brief Implementation of Flight RPC client. API should be -/// considered experimental for now +/// \brief Implementation of Flight RPC client. #pragma once @@ -177,7 +176,6 @@ class ARROW_FLIGHT_EXPORT FlightMetadataReader { }; /// \brief Client class for Arrow Flight RPC services. -/// API experimental for now class ARROW_FLIGHT_EXPORT FlightClient { public: ~FlightClient(); @@ -275,8 +273,6 @@ class ARROW_FLIGHT_EXPORT FlightClient { /// \param[in] options Per-RPC options /// \param[in] descriptor the dataset request /// \param[in] listener Callbacks for response and RPC completion - /// - /// This API is EXPERIMENTAL. void GetFlightInfoAsync(const FlightCallOptions& options, const FlightDescriptor& descriptor, std::shared_ptr> listener); @@ -288,8 +284,6 @@ class ARROW_FLIGHT_EXPORT FlightClient { /// \brief Asynchronous GetFlightInfo returning a Future. /// \param[in] options Per-RPC options /// \param[in] descriptor the dataset request - /// - /// This API is EXPERIMENTAL. arrow::Future GetFlightInfoAsync(const FlightCallOptions& options, const FlightDescriptor& descriptor); arrow::Future GetFlightInfoAsync(const FlightDescriptor& descriptor) { diff --git a/cpp/src/arrow/flight/cookie_internal.cc b/cpp/src/arrow/flight/cookie_internal.cc index 8f41106ebce5c..75a10d148bf47 100644 --- a/cpp/src/arrow/flight/cookie_internal.cc +++ b/cpp/src/arrow/flight/cookie_internal.cc @@ -15,8 +15,7 @@ // specific language governing permissions and limitations // under the License. -// Interfaces for defining middleware for Flight clients. Currently -// experimental. +// Interfaces for defining middleware for Flight clients. #include "arrow/flight/cookie_internal.h" #include "arrow/flight/client.h" diff --git a/cpp/src/arrow/flight/flight_benchmark.cc b/cpp/src/arrow/flight/flight_benchmark.cc index f53b1c6dcea30..057ef15c3c7ae 100644 --- a/cpp/src/arrow/flight/flight_benchmark.cc +++ b/cpp/src/arrow/flight/flight_benchmark.cc @@ -131,7 +131,8 @@ struct PerformanceStats { Status WaitForReady(FlightClient* client, const FlightCallOptions& call_options) { Action action{"ping", nullptr}; for (int attempt = 0; attempt < 10; attempt++) { - if (client->DoAction(call_options, action).ok()) { + auto result_stream_result = client->DoAction(call_options, action); + if (result_stream_result.ok() && (*result_stream_result)->Drain().ok()) { return Status::OK(); } std::this_thread::sleep_for(std::chrono::milliseconds(1000)); diff --git a/cpp/src/arrow/flight/middleware.h b/cpp/src/arrow/flight/middleware.h index 84448097ff019..d717e396a8b68 100644 --- a/cpp/src/arrow/flight/middleware.h +++ b/cpp/src/arrow/flight/middleware.h @@ -16,7 +16,7 @@ // under the License. // Interfaces for defining middleware for Flight clients and -// servers. Currently experimental. +// servers. #pragma once diff --git a/cpp/src/arrow/flight/server.h b/cpp/src/arrow/flight/server.h index ffcffe12e3c78..8d73353ab16c1 100644 --- a/cpp/src/arrow/flight/server.h +++ b/cpp/src/arrow/flight/server.h @@ -15,8 +15,7 @@ // specific language governing permissions and limitations // under the License. -// Interfaces to use for defining Flight RPC servers. API should be considered -// experimental for now +// Interfaces to use for defining Flight RPC servers. #pragma once diff --git a/cpp/src/arrow/flight/server_middleware.h b/cpp/src/arrow/flight/server_middleware.h index 030f1a17c2100..3a3e6f8616ed6 100644 --- a/cpp/src/arrow/flight/server_middleware.h +++ b/cpp/src/arrow/flight/server_middleware.h @@ -15,8 +15,7 @@ // specific language governing permissions and limitations // under the License. -// Interfaces for defining middleware for Flight servers. Currently -// experimental. +// Interfaces for defining middleware for Flight servers. #pragma once diff --git a/cpp/src/arrow/flight/sql/client.cc b/cpp/src/arrow/flight/sql/client.cc index 37b6a0b32e45d..86fd4868bad2d 100644 --- a/cpp/src/arrow/flight/sql/client.cc +++ b/cpp/src/arrow/flight/sql/client.cc @@ -682,7 +682,7 @@ arrow::Result> PreparedStatement::Execute( parameter_binding_.get())); } ARROW_ASSIGN_OR_RAISE(auto flight_info, client_->GetFlightInfo(options, descriptor)); - return std::move(flight_info); + return flight_info; } arrow::Result PreparedStatement::ExecuteUpdate( diff --git a/cpp/src/arrow/flight/sql/server.cc b/cpp/src/arrow/flight/sql/server.cc index cae3542b4faf8..63d1f5c5225fa 100644 --- a/cpp/src/arrow/flight/sql/server.cc +++ b/cpp/src/arrow/flight/sql/server.cc @@ -15,8 +15,7 @@ // specific language governing permissions and limitations // under the License. -// Interfaces to use for defining Flight RPC servers. API should be considered -// experimental for now +// Interfaces to use for defining Flight RPC servers. // Platform-specific defines #include "arrow/flight/platform.h" diff --git a/cpp/src/arrow/flight/sql/server.h b/cpp/src/arrow/flight/sql/server.h index 7b5d71678f3de..7130e96987b89 100644 --- a/cpp/src/arrow/flight/sql/server.h +++ b/cpp/src/arrow/flight/sql/server.h @@ -15,8 +15,7 @@ // specific language governing permissions and limitations // under the License. -// Interfaces to use for defining Flight RPC servers. API should be considered -// experimental for now +// Interfaces to use for defining Flight RPC servers. #pragma once diff --git a/cpp/src/arrow/flight/sql/server_session_middleware.h b/cpp/src/arrow/flight/sql/server_session_middleware.h index 021793de3de32..6eb11041a08bd 100644 --- a/cpp/src/arrow/flight/sql/server_session_middleware.h +++ b/cpp/src/arrow/flight/sql/server_session_middleware.h @@ -16,7 +16,6 @@ // under the License. // Middleware for handling Flight SQL Sessions including session cookie handling. -// Currently experimental. #pragma once diff --git a/cpp/src/arrow/flight/transport.h b/cpp/src/arrow/flight/transport.h index 4029aa5223deb..4ce50534023fc 100644 --- a/cpp/src/arrow/flight/transport.h +++ b/cpp/src/arrow/flight/transport.h @@ -19,8 +19,6 @@ /// Internal (but not private) interface for implementing /// alternate network transports in Flight. /// -/// \warning EXPERIMENTAL. Subject to change. -/// /// To implement a transport, implement ServerTransport and /// ClientTransport, and register the desired URI schemes with /// TransportRegistry. Flight takes care of most of the per-RPC @@ -248,8 +246,6 @@ TransportRegistry* GetDefaultTransportRegistry(); /// Transport implementations may subclass this to store their own /// state, and stash an instance in a user-supplied AsyncListener via /// ClientTransport::GetAsyncRpc and ClientTransport::SetAsyncRpc. -/// -/// This API is EXPERIMENTAL. class ARROW_FLIGHT_EXPORT AsyncRpc { public: virtual ~AsyncRpc() = default; diff --git a/cpp/src/arrow/flight/types.h b/cpp/src/arrow/flight/types.h index b3df8377b8ffd..cdf03f21041ee 100644 --- a/cpp/src/arrow/flight/types.h +++ b/cpp/src/arrow/flight/types.h @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -// Data structure for Flight RPC. API should be considered experimental for now +// Data structure for Flight RPC. #pragma once @@ -1115,8 +1115,6 @@ std::string ToString(TransportStatusCode code); /// instead of trying to translate to Arrow Status. /// /// Currently, only attached to the Status passed to AsyncListener::OnFinish. -/// -/// This API is EXPERIMENTAL. class ARROW_FLIGHT_EXPORT TransportStatusDetail : public StatusDetail { public: constexpr static const char* kTypeId = "flight::TransportStatusDetail"; diff --git a/cpp/src/arrow/flight/types_async.h b/cpp/src/arrow/flight/types_async.h index a241e64fb4e49..d5ed48d8a6438 100644 --- a/cpp/src/arrow/flight/types_async.h +++ b/cpp/src/arrow/flight/types_async.h @@ -31,8 +31,6 @@ namespace arrow::flight { /// @{ /// \brief Non-templated state for an async RPC. -/// -/// This API is EXPERIMENTAL. class ARROW_FLIGHT_EXPORT AsyncListenerBase { public: AsyncListenerBase(); @@ -57,8 +55,6 @@ class ARROW_FLIGHT_EXPORT AsyncListenerBase { /// A single listener may not be used for multiple concurrent RPC /// calls. The application MUST hold the listener alive until /// OnFinish() is called and has finished. -/// -/// This API is EXPERIMENTAL. template class ARROW_FLIGHT_EXPORT AsyncListener : public AsyncListenerBase { public: diff --git a/cpp/src/arrow/gpu/cuda_memory.cc b/cpp/src/arrow/gpu/cuda_memory.cc index dcf0a31963e45..148de68434272 100644 --- a/cpp/src/arrow/gpu/cuda_memory.cc +++ b/cpp/src/arrow/gpu/cuda_memory.cc @@ -89,7 +89,7 @@ Result> CudaIpcMemHandle::Serialize(MemoryPool* pool) co memcpy(buffer->mutable_data() + sizeof(impl_->memory_size), &impl_->ipc_handle, sizeof(impl_->ipc_handle)); } - return std::move(buffer); + return buffer; } const void* CudaIpcMemHandle::handle() const { return &impl_->ipc_handle; } diff --git a/cpp/src/arrow/integration/json_internal.cc b/cpp/src/arrow/integration/json_internal.cc index 4b75e84bfccb6..89719b4ba4b2e 100644 --- a/cpp/src/arrow/integration/json_internal.cc +++ b/cpp/src/arrow/integration/json_internal.cc @@ -1069,9 +1069,9 @@ Result> GetUnion(const RjObject& json_type, } if (mode == UnionMode::SPARSE) { - return sparse_union(std::move(children), std::move(type_codes)); + return sparse_union(children, std::move(type_codes)); } else { - return dense_union(std::move(children), std::move(type_codes)); + return dense_union(children, std::move(type_codes)); } } diff --git a/cpp/src/arrow/io/buffered.cc b/cpp/src/arrow/io/buffered.cc index 21cce478d3fa5..c53b3d223d4c0 100644 --- a/cpp/src/arrow/io/buffered.cc +++ b/cpp/src/arrow/io/buffered.cc @@ -423,7 +423,8 @@ class BufferedInputStream::Impl : public BufferedBase { RETURN_NOT_OK(buffer->Resize(bytes_read, false /* shrink_to_fit */)); buffer->ZeroPadding(); } - return std::move(buffer); + // R build with openSUSE155 requires an explicit shared_ptr construction + return std::shared_ptr(std::move(buffer)); } // For providing access to the raw file handles diff --git a/cpp/src/arrow/io/compressed.cc b/cpp/src/arrow/io/compressed.cc index 6a6fbf40f9628..306f7c4bf8519 100644 --- a/cpp/src/arrow/io/compressed.cc +++ b/cpp/src/arrow/io/compressed.cc @@ -411,9 +411,8 @@ class CompressedInputStream::Impl { ARROW_ASSIGN_OR_RAISE(auto buf, AllocateResizableBuffer(nbytes, pool_)); ARROW_ASSIGN_OR_RAISE(int64_t bytes_read, Read(nbytes, buf->mutable_data())); RETURN_NOT_OK(buf->Resize(bytes_read)); - // Using std::move because some compiler might has issue below: - // https://wg21.cmeerw.net/cwg/issue1579 - return std::move(buf); + // R build with openSUSE155 requires an explicit shared_ptr construction + return std::shared_ptr(std::move(buf)); } const std::shared_ptr& raw() const { return raw_; } diff --git a/cpp/src/arrow/io/compressed_test.cc b/cpp/src/arrow/io/compressed_test.cc index bd414149d5345..12d116e3395d4 100644 --- a/cpp/src/arrow/io/compressed_test.cc +++ b/cpp/src/arrow/io/compressed_test.cc @@ -77,7 +77,7 @@ std::shared_ptr CompressDataOneShot(Codec* codec, compressed_len = *codec->Compress(data.size(), data.data(), max_compressed_len, compressed->mutable_data()); ABORT_NOT_OK(compressed->Resize(compressed_len)); - return std::move(compressed); + return compressed; } Status RunCompressedInputStream(Codec* codec, std::shared_ptr compressed, diff --git a/cpp/src/arrow/io/file.cc b/cpp/src/arrow/io/file.cc index cc3a5187059e9..a22accf65660a 100644 --- a/cpp/src/arrow/io/file.cc +++ b/cpp/src/arrow/io/file.cc @@ -230,7 +230,8 @@ class ReadableFile::ReadableFileImpl : public OSFile { RETURN_NOT_OK(buffer->Resize(bytes_read)); buffer->ZeroPadding(); } - return std::move(buffer); + // R build with openSUSE155 requires an explicit shared_ptr construction + return std::shared_ptr(std::move(buffer)); } Result> ReadBufferAt(int64_t position, int64_t nbytes) { @@ -242,7 +243,8 @@ class ReadableFile::ReadableFileImpl : public OSFile { RETURN_NOT_OK(buffer->Resize(bytes_read)); buffer->ZeroPadding(); } - return std::move(buffer); + // R build with openSUSE155 requires an explicit shared_ptr construction + return std::shared_ptr(std::move(buffer)); } Status WillNeed(const std::vector& ranges) { diff --git a/cpp/src/arrow/io/hdfs.cc b/cpp/src/arrow/io/hdfs.cc index 5d3edcd3ba63a..5fb762d076376 100644 --- a/cpp/src/arrow/io/hdfs.cc +++ b/cpp/src/arrow/io/hdfs.cc @@ -172,7 +172,8 @@ class HdfsReadableFile::HdfsReadableFileImpl : public HdfsAnyFileImpl { RETURN_NOT_OK(buffer->Resize(bytes_read)); buffer->ZeroPadding(); } - return std::move(buffer); + // R build with openSUSE155 requires an explicit shared_ptr construction + return std::shared_ptr(std::move(buffer)); } Result Read(int64_t nbytes, void* buffer) { @@ -200,7 +201,8 @@ class HdfsReadableFile::HdfsReadableFileImpl : public HdfsAnyFileImpl { if (bytes_read < nbytes) { RETURN_NOT_OK(buffer->Resize(bytes_read)); } - return std::move(buffer); + // R build with openSUSE155 requires an explicit shared_ptr construction + return std::shared_ptr(std::move(buffer)); } Result GetSize() { diff --git a/cpp/src/arrow/io/stdio.cc b/cpp/src/arrow/io/stdio.cc index ac6ca3a63a604..ba4a66a2f340e 100644 --- a/cpp/src/arrow/io/stdio.cc +++ b/cpp/src/arrow/io/stdio.cc @@ -85,7 +85,8 @@ Result> StdinStream::Read(int64_t nbytes) { ARROW_ASSIGN_OR_RAISE(int64_t bytes_read, Read(nbytes, buffer->mutable_data())); ARROW_RETURN_NOT_OK(buffer->Resize(bytes_read, false)); buffer->ZeroPadding(); - return std::move(buffer); + // R build with openSUSE155 requires an explicit shared_ptr construction + return std::shared_ptr(std::move(buffer)); } } // namespace io diff --git a/cpp/src/arrow/ipc/CMakeLists.txt b/cpp/src/arrow/ipc/CMakeLists.txt index 9fd71361d9b76..2fc9b145ccc98 100644 --- a/cpp/src/arrow/ipc/CMakeLists.txt +++ b/cpp/src/arrow/ipc/CMakeLists.txt @@ -39,6 +39,7 @@ endfunction() add_arrow_test(feather_test) add_arrow_ipc_test(json_simple_test) +add_arrow_ipc_test(message_internal_test) add_arrow_ipc_test(read_write_test) add_arrow_ipc_test(tensor_test) @@ -56,6 +57,7 @@ if(ARROW_BUILD_UTILITIES OR ARROW_BUILD_INTEGRATION) target_link_libraries(arrow-file-to-stream ${ARROW_UTIL_LIB}) add_executable(arrow-stream-to-file stream_to_file.cc) target_link_libraries(arrow-stream-to-file ${ARROW_UTIL_LIB}) + if(ARROW_BUILD_UTILITIES) install(TARGETS arrow-file-to-stream arrow-stream-to-file ${INSTALL_IS_OPTIONAL} DESTINATION ${CMAKE_INSTALL_BINDIR}) diff --git a/cpp/src/arrow/ipc/message.cc b/cpp/src/arrow/ipc/message.cc index e196dd7bf5389..27ded52861eaf 100644 --- a/cpp/src/arrow/ipc/message.cc +++ b/cpp/src/arrow/ipc/message.cc @@ -130,7 +130,7 @@ Result> Message::Open(std::shared_ptr metadata, std::shared_ptr body) { std::unique_ptr result(new Message(std::move(metadata), std::move(body))); RETURN_NOT_OK(result->impl_->Open()); - return std::move(result); + return result; } Message::~Message() {} @@ -208,7 +208,7 @@ Result> Message::ReadFrom(std::shared_ptr metad " bytes for message body, got ", body->size()); } RETURN_NOT_OK(decoder.Consume(body)); - return std::move(result); + return result; } Result> Message::ReadFrom(const int64_t offset, @@ -225,7 +225,7 @@ Result> Message::ReadFrom(const int64_t offset, " bytes for message body, got ", body->size()); } RETURN_NOT_OK(decoder.Consume(body)); - return std::move(result); + return result; } Status WritePadding(io::OutputStream* stream, int64_t nbytes) { @@ -329,7 +329,7 @@ Result> ReadMessage(std::shared_ptr metadata, case MessageDecoder::State::INITIAL: // Metadata did not request a body so we better not have provided one DCHECK_EQ(body, nullptr); - return std::move(result); + return result; case MessageDecoder::State::METADATA_LENGTH: return Status::Invalid("metadata length is missing from the metadata buffer"); case MessageDecoder::State::METADATA: @@ -338,7 +338,7 @@ Result> ReadMessage(std::shared_ptr metadata, case MessageDecoder::State::BODY: { if (body == nullptr) { // Caller didn't give a body so just give them a message without body - return std::move(result); + return result; } if (body->size() != decoder.next_required_size()) { return Status::IOError("Expected body buffer to be ", @@ -346,7 +346,7 @@ Result> ReadMessage(std::shared_ptr metadata, " bytes for message body, got ", body->size()); } RETURN_NOT_OK(decoder.Consume(body)); - return std::move(result); + return result; } case MessageDecoder::State::EOS: return Status::Invalid("Unexpected empty message in IPC file format"); @@ -376,7 +376,7 @@ Result> ReadMessage(int64_t offset, int32_t metadata_le switch (decoder.state()) { case MessageDecoder::State::INITIAL: - return std::move(result); + return result; case MessageDecoder::State::METADATA_LENGTH: return Status::Invalid("metadata length is missing. File offset: ", offset, ", metadata length: ", metadata_length); @@ -401,7 +401,7 @@ Result> ReadMessage(int64_t offset, int32_t metadata_le " bytes for message body, got ", body->size()); } RETURN_NOT_OK(decoder.Consume(body)); - return std::move(result); + return result; } case MessageDecoder::State::EOS: return Status::Invalid("Unexpected empty message in IPC file format"); @@ -551,7 +551,7 @@ Result> ReadMessage(io::InputStream* file, MemoryPool* if (!message) { return nullptr; } else { - return std::move(message); + return message; } } diff --git a/cpp/src/arrow/ipc/message_internal_test.cc b/cpp/src/arrow/ipc/message_internal_test.cc new file mode 100644 index 0000000000000..112240f08d552 --- /dev/null +++ b/cpp/src/arrow/ipc/message_internal_test.cc @@ -0,0 +1,81 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include +#include +#include + +#include "arrow/buffer.h" +#include "arrow/ipc/dictionary.h" +#include "arrow/ipc/metadata_internal.h" +#include "arrow/ipc/options.h" +#include "arrow/testing/gtest_util.h" +#include "arrow/util/key_value_metadata.h" + +namespace arrow::ipc::internal { + +using FBB = flatbuffers::FlatBufferBuilder; + +// GH-40361: Test that Flatbuffer serialization matches a known output +// byte-for-byte. +// +// Our Flatbuffers code should not depend on argument evaluation order as it's +// undefined (https://en.cppreference.com/w/cpp/language/eval_order) and may +// lead to unnecessary platform- or toolchain-specific differences in +// serialization. +TEST(TestMessageInternal, TestByteIdentical) { + FBB fbb; + flatbuffers::Offset fb_schema; + DictionaryFieldMapper mapper; + + // Create a simple Schema with just two metadata KVPs + auto f0 = field("f0", int64()); + auto f1 = field("f1", int64()); + std::vector> fields = {f0, f1}; + std::shared_ptr metadata = + KeyValueMetadata::Make({"key_1", "key_2"}, {"key_1_value", "key_2_value"}); + auto schema = ::arrow::schema({f0}, metadata); + + // Serialize the Schema to a Buffer + std::shared_ptr out_buffer; + ASSERT_OK( + WriteSchemaMessage(*schema, mapper, IpcWriteOptions::Defaults(), &out_buffer)); + + // This is example output from macOS+ARM+LLVM + const uint8_t expected[] = { + 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0A, 0x00, 0x0E, 0x00, 0x06, 0x00, 0x05, 0x00, + 0x08, 0x00, 0x0A, 0x00, 0x00, 0x00, 0x00, 0x01, 0x04, 0x00, 0x10, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x0A, 0x00, 0x0C, 0x00, 0x00, 0x00, 0x04, 0x00, 0x08, 0x00, 0x0A, 0x00, + 0x00, 0x00, 0x6C, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x38, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xD8, 0xFF, 0xFF, 0xFF, 0x18, 0x00, + 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x00, 0x00, 0x6B, 0x65, 0x79, 0x5F, + 0x32, 0x5F, 0x76, 0x61, 0x6C, 0x75, 0x65, 0x00, 0x05, 0x00, 0x00, 0x00, 0x6B, 0x65, + 0x79, 0x5F, 0x32, 0x00, 0x00, 0x00, 0x08, 0x00, 0x0C, 0x00, 0x04, 0x00, 0x08, 0x00, + 0x08, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x0B, 0x00, + 0x00, 0x00, 0x6B, 0x65, 0x79, 0x5F, 0x31, 0x5F, 0x76, 0x61, 0x6C, 0x75, 0x65, 0x00, + 0x05, 0x00, 0x00, 0x00, 0x6B, 0x65, 0x79, 0x5F, 0x31, 0x00, 0x00, 0x00, 0x01, 0x00, + 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x10, 0x00, 0x14, 0x00, 0x08, 0x00, 0x06, 0x00, + 0x07, 0x00, 0x0C, 0x00, 0x00, 0x00, 0x10, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x01, 0x02, 0x10, 0x00, 0x00, 0x00, 0x1C, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x66, 0x30, 0x00, 0x00, 0x08, 0x00, + 0x0C, 0x00, 0x08, 0x00, 0x07, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, + 0x40, 0x00, 0x00, 0x00}; + Buffer expected_buffer(expected, sizeof(expected)); + + AssertBufferEqual(expected_buffer, *out_buffer); +} +} // namespace arrow::ipc::internal diff --git a/cpp/src/arrow/ipc/metadata_internal.cc b/cpp/src/arrow/ipc/metadata_internal.cc index e20b352d18d95..be8d1ccc35f1a 100644 --- a/cpp/src/arrow/ipc/metadata_internal.cc +++ b/cpp/src/arrow/ipc/metadata_internal.cc @@ -191,11 +191,9 @@ Status UnionFromFlatbuffer(const flatbuf::Union* union_data, } if (mode == UnionMode::SPARSE) { - ARROW_ASSIGN_OR_RAISE( - *out, SparseUnionType::Make(std::move(children), std::move(type_codes))); + ARROW_ASSIGN_OR_RAISE(*out, SparseUnionType::Make(children, std::move(type_codes))); } else { - ARROW_ASSIGN_OR_RAISE( - *out, DenseUnionType::Make(std::move(children), std::move(type_codes))); + ARROW_ASSIGN_OR_RAISE(*out, DenseUnionType::Make(children, std::move(type_codes))); } return Status::OK(); } @@ -478,7 +476,9 @@ static Status GetDictionaryEncoding(FBB& fbb, const std::shared_ptr& fiel static KeyValueOffset AppendKeyValue(FBB& fbb, const std::string& key, const std::string& value) { - return flatbuf::CreateKeyValue(fbb, fbb.CreateString(key), fbb.CreateString(value)); + auto fbb_key = fbb.CreateString(key); + auto fbb_value = fbb.CreateString(value); + return flatbuf::CreateKeyValue(fbb, fbb_key, fbb_value); } static void AppendKeyValueMetadata(FBB& fbb, const KeyValueMetadata& metadata, diff --git a/cpp/src/arrow/ipc/metadata_internal.h b/cpp/src/arrow/ipc/metadata_internal.h index 631a336f75a9a..c0aca44644a40 100644 --- a/cpp/src/arrow/ipc/metadata_internal.h +++ b/cpp/src/arrow/ipc/metadata_internal.h @@ -238,7 +238,8 @@ static inline Result> WriteFlatbufferBuilder( uint8_t* dst = result->mutable_data(); memcpy(dst, fbb.GetBufferPointer(), size); - return std::move(result); + // R build with openSUSE155 requires an explicit shared_ptr construction + return std::shared_ptr(std::move(result)); } ARROW_EXPORT diff --git a/cpp/src/arrow/ipc/reader.cc b/cpp/src/arrow/ipc/reader.cc index d272c78560f82..da84f2f2dc87d 100644 --- a/cpp/src/arrow/ipc/reader.cc +++ b/cpp/src/arrow/ipc/reader.cc @@ -540,7 +540,8 @@ Result> DecompressBuffer(const std::shared_ptr& actual_decompressed); } - return std::move(uncompressed); + // R build with openSUSE155 requires an explicit shared_ptr construction + return std::shared_ptr(std::move(uncompressed)); } Status DecompressBuffers(Compression::type compression, const IpcReadOptions& options, @@ -1174,7 +1175,7 @@ static Result> ReadMessageFromBlock( ARROW_ASSIGN_OR_RAISE(auto message, ReadMessage(block.offset, block.metadata_length, file, fields_loader)); - return std::move(message); + return message; } static Future> ReadMessageFromBlockAsync( @@ -1536,7 +1537,7 @@ class RecordBatchFileReaderImpl : public RecordBatchFileReader { ARROW_ASSIGN_OR_RAISE(auto message, arrow::ipc::ReadMessageFromBlock(block, file_, fields_loader)); stats_.num_messages.fetch_add(1, std::memory_order_relaxed); - return std::move(message); + return message; } Status ReadDictionaries() { @@ -1632,7 +1633,7 @@ class RecordBatchFileReaderImpl : public RecordBatchFileReader { } context.compression = compression; context.metadata_version = internal::GetMetadataVersion(message->version()); - return std::move(context); + return context; } Result GetBatchFromMessage( @@ -2704,7 +2705,7 @@ Result> IoRecordedRandomAccessFile::Read(int64_t nbytes) ARROW_ASSIGN_OR_RAISE(std::shared_ptr buffer, ReadAt(position_, nbytes)); auto num_bytes_read = std::min(file_size_, position_ + nbytes) - position_; position_ += num_bytes_read; - return std::move(buffer); + return buffer; } const io::IOContext& IoRecordedRandomAccessFile::io_context() const { diff --git a/cpp/src/arrow/ipc/writer.cc b/cpp/src/arrow/ipc/writer.cc index 93256440f4a7a..e3dd36efe0543 100644 --- a/cpp/src/arrow/ipc/writer.cc +++ b/cpp/src/arrow/ipc/writer.cc @@ -1561,7 +1561,8 @@ Result> OpenRecordBatchWriter( auto writer = std::make_unique( std::move(sink), schema, options, /*is_file_format=*/false); RETURN_NOT_OK(writer->Start()); - return std::move(writer); + // R build with openSUSE155 requires an explicit unique_ptr construction + return std::unique_ptr(std::move(writer)); } Result> MakePayloadStreamWriter( diff --git a/cpp/src/arrow/json/object_writer.h b/cpp/src/arrow/json/object_writer.h index b15b09dbdacfc..cf1ce62194fb8 100644 --- a/cpp/src/arrow/json/object_writer.h +++ b/cpp/src/arrow/json/object_writer.h @@ -18,6 +18,7 @@ #pragma once #include +#include #include #include "arrow/util/visibility.h" diff --git a/cpp/src/arrow/record_batch.cc b/cpp/src/arrow/record_batch.cc index 8521d500f5c05..e3a8c0d710cb8 100644 --- a/cpp/src/arrow/record_batch.cc +++ b/cpp/src/arrow/record_batch.cc @@ -35,7 +35,6 @@ #include "arrow/type.h" #include "arrow/util/iterator.h" #include "arrow/util/logging.h" -#include "arrow/util/unreachable.h" #include "arrow/util/vector.h" #include "arrow/visit_type_inline.h" @@ -59,17 +58,31 @@ int RecordBatch::num_columns() const { return schema_->num_fields(); } class SimpleRecordBatch : public RecordBatch { public: SimpleRecordBatch(std::shared_ptr schema, int64_t num_rows, - std::vector> columns) - : RecordBatch(std::move(schema), num_rows), boxed_columns_(std::move(columns)) { + std::vector> columns, + std::shared_ptr sync_event = nullptr) + : RecordBatch(std::move(schema), num_rows), + boxed_columns_(std::move(columns)), + device_type_(DeviceAllocationType::kCPU), + sync_event_(std::move(sync_event)) { + if (boxed_columns_.size() > 0) { + device_type_ = boxed_columns_[0]->device_type(); + } + columns_.resize(boxed_columns_.size()); for (size_t i = 0; i < columns_.size(); ++i) { columns_[i] = boxed_columns_[i]->data(); + DCHECK_EQ(device_type_, columns_[i]->device_type()); } } SimpleRecordBatch(const std::shared_ptr& schema, int64_t num_rows, - std::vector> columns) - : RecordBatch(std::move(schema), num_rows), columns_(std::move(columns)) { + std::vector> columns, + DeviceAllocationType device_type = DeviceAllocationType::kCPU, + std::shared_ptr sync_event = nullptr) + : RecordBatch(std::move(schema), num_rows), + columns_(std::move(columns)), + device_type_(device_type), + sync_event_(std::move(sync_event)) { boxed_columns_.resize(schema_->num_fields()); } @@ -99,6 +112,7 @@ class SimpleRecordBatch : public RecordBatch { const std::shared_ptr& column) const override { ARROW_CHECK(field != nullptr); ARROW_CHECK(column != nullptr); + ARROW_CHECK(column->device_type() == device_type_); if (!field->type()->Equals(column->type())) { return Status::TypeError("Column data type ", field->type()->name(), @@ -113,7 +127,8 @@ class SimpleRecordBatch : public RecordBatch { ARROW_ASSIGN_OR_RAISE(auto new_schema, schema_->AddField(i, field)); return RecordBatch::Make(std::move(new_schema), num_rows_, - internal::AddVectorElement(columns_, i, column->data())); + internal::AddVectorElement(columns_, i, column->data()), + device_type_, sync_event_); } Result> SetColumn( @@ -121,6 +136,7 @@ class SimpleRecordBatch : public RecordBatch { const std::shared_ptr& column) const override { ARROW_CHECK(field != nullptr); ARROW_CHECK(column != nullptr); + ARROW_CHECK(column->device_type() == device_type_); if (!field->type()->Equals(column->type())) { return Status::TypeError("Column data type ", field->type()->name(), @@ -135,19 +151,22 @@ class SimpleRecordBatch : public RecordBatch { ARROW_ASSIGN_OR_RAISE(auto new_schema, schema_->SetField(i, field)); return RecordBatch::Make(std::move(new_schema), num_rows_, - internal::ReplaceVectorElement(columns_, i, column->data())); + internal::ReplaceVectorElement(columns_, i, column->data()), + device_type_, sync_event_); } Result> RemoveColumn(int i) const override { ARROW_ASSIGN_OR_RAISE(auto new_schema, schema_->RemoveField(i)); return RecordBatch::Make(std::move(new_schema), num_rows_, - internal::DeleteVectorElement(columns_, i)); + internal::DeleteVectorElement(columns_, i), device_type_, + sync_event_); } std::shared_ptr ReplaceSchemaMetadata( const std::shared_ptr& metadata) const override { auto new_schema = schema_->WithMetadata(metadata); - return RecordBatch::Make(std::move(new_schema), num_rows_, columns_); + return RecordBatch::Make(std::move(new_schema), num_rows_, columns_, device_type_, + sync_event_); } std::shared_ptr Slice(int64_t offset, int64_t length) const override { @@ -157,7 +176,8 @@ class SimpleRecordBatch : public RecordBatch { arrays.emplace_back(field->Slice(offset, length)); } int64_t num_rows = std::min(num_rows_ - offset, length); - return std::make_shared(schema_, num_rows, std::move(arrays)); + return std::make_shared(schema_, num_rows, std::move(arrays), + device_type_, sync_event_); } Status Validate() const override { @@ -167,11 +187,22 @@ class SimpleRecordBatch : public RecordBatch { return RecordBatch::Validate(); } + const std::shared_ptr& GetSyncEvent() const override { + return sync_event_; + } + + DeviceAllocationType device_type() const override { return device_type_; } + private: std::vector> columns_; // Caching boxed array data mutable std::vector> boxed_columns_; + + // the type of device that the buffers for columns are allocated on. + // all columns should be on the same type of device. + DeviceAllocationType device_type_; + std::shared_ptr sync_event_; }; RecordBatch::RecordBatch(const std::shared_ptr& schema, int64_t num_rows) @@ -179,18 +210,21 @@ RecordBatch::RecordBatch(const std::shared_ptr& schema, int64_t num_rows std::shared_ptr RecordBatch::Make( std::shared_ptr schema, int64_t num_rows, - std::vector> columns) { + std::vector> columns, + std::shared_ptr sync_event) { DCHECK_EQ(schema->num_fields(), static_cast(columns.size())); return std::make_shared(std::move(schema), num_rows, - std::move(columns)); + std::move(columns), std::move(sync_event)); } std::shared_ptr RecordBatch::Make( std::shared_ptr schema, int64_t num_rows, - std::vector> columns) { + std::vector> columns, DeviceAllocationType device_type, + std::shared_ptr sync_event) { DCHECK_EQ(schema->num_fields(), static_cast(columns.size())); return std::make_shared(std::move(schema), num_rows, - std::move(columns)); + std::move(columns), device_type, + std::move(sync_event)); } Result> RecordBatch::MakeEmpty( @@ -251,204 +285,11 @@ Result> RecordBatch::ToStructArray() const { /*offset=*/0); } -template -struct ConvertColumnsToTensorVisitor { - Out*& out_values; - const ArrayData& in_data; - - template - Status Visit(const T&) { - if constexpr (is_numeric(T::type_id)) { - using In = typename T::c_type; - auto in_values = ArraySpan(in_data).GetSpan(1, in_data.length); - - if (in_data.null_count == 0) { - if constexpr (std::is_same_v) { - memcpy(out_values, in_values.data(), in_values.size_bytes()); - out_values += in_values.size(); - } else { - for (In in_value : in_values) { - *out_values++ = static_cast(in_value); - } - } - } else { - for (int64_t i = 0; i < in_data.length; ++i) { - *out_values++ = - in_data.IsNull(i) ? static_cast(NAN) : static_cast(in_values[i]); - } - } - return Status::OK(); - } - Unreachable(); - } -}; - -template -struct ConvertColumnsToTensorRowMajorVisitor { - Out*& out_values; - const ArrayData& in_data; - int num_cols; - int col_idx; - - template - Status Visit(const T&) { - if constexpr (is_numeric(T::type_id)) { - using In = typename T::c_type; - auto in_values = ArraySpan(in_data).GetSpan(1, in_data.length); - - if (in_data.null_count == 0) { - for (int64_t i = 0; i < in_data.length; ++i) { - out_values[i * num_cols + col_idx] = static_cast(in_values[i]); - } - } else { - for (int64_t i = 0; i < in_data.length; ++i) { - out_values[i * num_cols + col_idx] = - in_data.IsNull(i) ? static_cast(NAN) : static_cast(in_values[i]); - } - } - return Status::OK(); - } - Unreachable(); - } -}; - -template -inline void ConvertColumnsToTensor(const RecordBatch& batch, uint8_t* out, - bool row_major) { - using CType = typename arrow::TypeTraits::CType; - auto* out_values = reinterpret_cast(out); - - int i = 0; - for (const auto& column : batch.columns()) { - if (row_major) { - ConvertColumnsToTensorRowMajorVisitor visitor{out_values, *column->data(), - batch.num_columns(), i++}; - DCHECK_OK(VisitTypeInline(*column->type(), &visitor)); - } else { - ConvertColumnsToTensorVisitor visitor{out_values, *column->data()}; - DCHECK_OK(VisitTypeInline(*column->type(), &visitor)); - } - } -} - Result> RecordBatch::ToTensor(bool null_to_nan, bool row_major, MemoryPool* pool) const { - if (num_columns() == 0) { - return Status::TypeError( - "Conversion to Tensor for RecordBatches without columns/schema is not " - "supported."); - } - // Check for no validity bitmap of each field - // if null_to_nan conversion is set to false - for (int i = 0; i < num_columns(); ++i) { - if (column(i)->null_count() > 0 && !null_to_nan) { - return Status::TypeError( - "Can only convert a RecordBatch with no nulls. Set null_to_nan to true to " - "convert nulls to NaN"); - } - } - - // Check for supported data types and merge fields - // to get the resulting uniform data type - if (!is_integer(column(0)->type()->id()) && !is_floating(column(0)->type()->id())) { - return Status::TypeError("DataType is not supported: ", - column(0)->type()->ToString()); - } - std::shared_ptr result_field = schema_->field(0); - std::shared_ptr result_type = result_field->type(); - - Field::MergeOptions options; - options.promote_integer_to_float = true; - options.promote_integer_sign = true; - options.promote_numeric_width = true; - - if (num_columns() > 1) { - for (int i = 1; i < num_columns(); ++i) { - if (!is_numeric(column(i)->type()->id())) { - return Status::TypeError("DataType is not supported: ", - column(i)->type()->ToString()); - } - - // Casting of float16 is not supported, throw an error in this case - if ((column(i)->type()->id() == Type::HALF_FLOAT || - result_field->type()->id() == Type::HALF_FLOAT) && - column(i)->type()->id() != result_field->type()->id()) { - return Status::NotImplemented("Casting from or to halffloat is not supported."); - } - - ARROW_ASSIGN_OR_RAISE( - result_field, result_field->MergeWith( - schema_->field(i)->WithName(result_field->name()), options)); - } - result_type = result_field->type(); - } - - // Check if result_type is signed or unsigned integer and null_to_nan is set to true - // Then all columns should be promoted to float type - if (is_integer(result_type->id()) && null_to_nan) { - ARROW_ASSIGN_OR_RAISE( - result_field, - result_field->MergeWith(field(result_field->name(), float32()), options)); - result_type = result_field->type(); - } - - // Allocate memory - ARROW_ASSIGN_OR_RAISE( - std::shared_ptr result, - AllocateBuffer(result_type->bit_width() * num_columns() * num_rows(), pool)); - // Copy data - switch (result_type->id()) { - case Type::UINT8: - ConvertColumnsToTensor(*this, result->mutable_data(), row_major); - break; - case Type::UINT16: - case Type::HALF_FLOAT: - ConvertColumnsToTensor(*this, result->mutable_data(), row_major); - break; - case Type::UINT32: - ConvertColumnsToTensor(*this, result->mutable_data(), row_major); - break; - case Type::UINT64: - ConvertColumnsToTensor(*this, result->mutable_data(), row_major); - break; - case Type::INT8: - ConvertColumnsToTensor(*this, result->mutable_data(), row_major); - break; - case Type::INT16: - ConvertColumnsToTensor(*this, result->mutable_data(), row_major); - break; - case Type::INT32: - ConvertColumnsToTensor(*this, result->mutable_data(), row_major); - break; - case Type::INT64: - ConvertColumnsToTensor(*this, result->mutable_data(), row_major); - break; - case Type::FLOAT: - ConvertColumnsToTensor(*this, result->mutable_data(), row_major); - break; - case Type::DOUBLE: - ConvertColumnsToTensor(*this, result->mutable_data(), row_major); - break; - default: - return Status::TypeError("DataType is not supported: ", result_type->ToString()); - } - - // Construct Tensor object - const auto& fixed_width_type = - internal::checked_cast(*result_type); - std::vector shape = {num_rows(), num_columns()}; - std::vector strides; std::shared_ptr tensor; - - if (row_major) { - ARROW_RETURN_NOT_OK( - internal::ComputeRowMajorStrides(fixed_width_type, shape, &strides)); - } else { - ARROW_RETURN_NOT_OK( - internal::ComputeColumnMajorStrides(fixed_width_type, shape, &strides)); - } - ARROW_ASSIGN_OR_RAISE(tensor, - Tensor::Make(result_type, std::move(result), shape, strides)); + ARROW_RETURN_NOT_OK( + internal::RecordBatchToTensor(*this, null_to_nan, row_major, pool, &tensor)); return tensor; } @@ -466,6 +307,10 @@ bool RecordBatch::Equals(const RecordBatch& other, bool check_metadata, return false; } + if (device_type() != other.device_type()) { + return false; + } + for (int i = 0; i < num_columns(); ++i) { if (!column(i)->Equals(other.column(i), opts)) { return false; @@ -480,6 +325,10 @@ bool RecordBatch::ApproxEquals(const RecordBatch& other, const EqualOptions& opt return false; } + if (device_type() != other.device_type()) { + return false; + } + for (int i = 0; i < num_columns(); ++i) { if (!column(i)->ApproxEquals(other.column(i), opts)) { return false; @@ -505,7 +354,7 @@ Result> RecordBatch::ReplaceSchema( ", did not match new schema field type: ", replace_type->ToString()); } } - return RecordBatch::Make(std::move(schema), num_rows(), columns()); + return RecordBatch::Make(std::move(schema), num_rows(), columns(), GetSyncEvent()); } std::vector RecordBatch::ColumnNames() const { @@ -534,7 +383,7 @@ Result> RecordBatch::RenameColumns( } return RecordBatch::Make(::arrow::schema(std::move(fields)), num_rows(), - std::move(columns)); + std::move(columns), GetSyncEvent()); } Result> RecordBatch::SelectColumns( @@ -555,7 +404,8 @@ Result> RecordBatch::SelectColumns( auto new_schema = std::make_shared(std::move(fields), schema()->metadata()); - return RecordBatch::Make(std::move(new_schema), num_rows(), std::move(columns)); + return RecordBatch::Make(std::move(new_schema), num_rows(), std::move(columns), + GetSyncEvent()); } std::shared_ptr RecordBatch::Slice(int64_t offset) const { @@ -647,12 +497,16 @@ Result> RecordBatchReader::ToTable() { class SimpleRecordBatchReader : public RecordBatchReader { public: SimpleRecordBatchReader(Iterator> it, - std::shared_ptr schema) - : schema_(std::move(schema)), it_(std::move(it)) {} + std::shared_ptr schema, + DeviceAllocationType device_type = DeviceAllocationType::kCPU) + : schema_(std::move(schema)), it_(std::move(it)), device_type_(device_type) {} SimpleRecordBatchReader(std::vector> batches, - std::shared_ptr schema) - : schema_(std::move(schema)), it_(MakeVectorIterator(std::move(batches))) {} + std::shared_ptr schema, + DeviceAllocationType device_type = DeviceAllocationType::kCPU) + : schema_(std::move(schema)), + it_(MakeVectorIterator(std::move(batches))), + device_type_(device_type) {} Status ReadNext(std::shared_ptr* batch) override { return it_.Next().Value(batch); @@ -660,13 +514,17 @@ class SimpleRecordBatchReader : public RecordBatchReader { std::shared_ptr schema() const override { return schema_; } + DeviceAllocationType device_type() const override { return device_type_; } + protected: std::shared_ptr schema_; Iterator> it_; + DeviceAllocationType device_type_; }; Result> RecordBatchReader::Make( - std::vector> batches, std::shared_ptr schema) { + std::vector> batches, std::shared_ptr schema, + DeviceAllocationType device_type) { if (schema == nullptr) { if (batches.size() == 0 || batches[0] == nullptr) { return Status::Invalid("Cannot infer schema from empty vector or nullptr"); @@ -675,16 +533,19 @@ Result> RecordBatchReader::Make( schema = batches[0]->schema(); } - return std::make_shared(std::move(batches), std::move(schema)); + return std::make_shared(std::move(batches), std::move(schema), + device_type); } Result> RecordBatchReader::MakeFromIterator( - Iterator> batches, std::shared_ptr schema) { + Iterator> batches, std::shared_ptr schema, + DeviceAllocationType device_type) { if (schema == nullptr) { return Status::Invalid("Schema cannot be nullptr"); } - return std::make_shared(std::move(batches), std::move(schema)); + return std::make_shared(std::move(batches), std::move(schema), + device_type); } RecordBatchReader::~RecordBatchReader() { @@ -701,6 +562,10 @@ Result> ConcatenateRecordBatches( int cols = batches[0]->num_columns(); auto schema = batches[0]->schema(); for (size_t i = 0; i < batches.size(); ++i) { + if (auto sync = batches[i]->GetSyncEvent()) { + ARROW_RETURN_NOT_OK(sync->Wait()); + } + length += batches[i]->num_rows(); if (!schema->Equals(batches[i]->schema())) { return Status::Invalid( diff --git a/cpp/src/arrow/record_batch.h b/cpp/src/arrow/record_batch.h index cd647a88abd97..b03cbf2251f47 100644 --- a/cpp/src/arrow/record_batch.h +++ b/cpp/src/arrow/record_batch.h @@ -23,6 +23,7 @@ #include #include "arrow/compare.h" +#include "arrow/device.h" #include "arrow/result.h" #include "arrow/status.h" #include "arrow/type_fwd.h" @@ -45,9 +46,12 @@ class ARROW_EXPORT RecordBatch { /// \param[in] num_rows length of fields in the record batch. Each array /// should have the same length as num_rows /// \param[in] columns the record batch fields as vector of arrays - static std::shared_ptr Make(std::shared_ptr schema, - int64_t num_rows, - std::vector> columns); + /// \param[in] sync_event optional synchronization event for non-CPU device + /// memory used by buffers + static std::shared_ptr Make( + std::shared_ptr schema, int64_t num_rows, + std::vector> columns, + std::shared_ptr sync_event = NULLPTR); /// \brief Construct record batch from vector of internal data structures /// \since 0.5.0 @@ -58,9 +62,15 @@ class ARROW_EXPORT RecordBatch { /// \param num_rows the number of semantic rows in the record batch. This /// should be equal to the length of each field /// \param columns the data for the batch's columns + /// \param device_type the type of the device that the Arrow columns are + /// allocated on + /// \param sync_event optional synchronization event for non-CPU device + /// memory used by buffers static std::shared_ptr Make( std::shared_ptr schema, int64_t num_rows, - std::vector> columns); + std::vector> columns, + DeviceAllocationType device_type = DeviceAllocationType::kCPU, + std::shared_ptr sync_event = NULLPTR); /// \brief Create an empty RecordBatch of a given schema /// @@ -260,6 +270,18 @@ class ARROW_EXPORT RecordBatch { /// \return Status virtual Status ValidateFull() const; + /// \brief EXPERIMENTAL: Return a top-level sync event object for this record batch + /// + /// If all of the data for this record batch is in CPU memory, then this + /// will return null. If the data for this batch is + /// on a device, then if synchronization is needed before accessing the + /// data the returned sync event will allow for it. + /// + /// \return null or a Device::SyncEvent + virtual const std::shared_ptr& GetSyncEvent() const = 0; + + virtual DeviceAllocationType device_type() const = 0; + protected: RecordBatch(const std::shared_ptr& schema, int64_t num_rows); @@ -306,6 +328,11 @@ class ARROW_EXPORT RecordBatchReader { /// \brief finalize reader virtual Status Close() { return Status::OK(); } + /// \brief EXPERIMENTAL: Get the device type for record batches this reader produces + /// + /// default implementation is to return DeviceAllocationType::kCPU + virtual DeviceAllocationType device_type() const { return DeviceAllocationType::kCPU; } + class RecordBatchReaderIterator { public: using iterator_category = std::input_iterator_tag; @@ -379,15 +406,19 @@ class ARROW_EXPORT RecordBatchReader { /// \param[in] batches the vector of RecordBatch to read from /// \param[in] schema schema to conform to. Will be inferred from the first /// element if not provided. + /// \param[in] device_type the type of device that the batches are allocated on static Result> Make( - RecordBatchVector batches, std::shared_ptr schema = NULLPTR); + RecordBatchVector batches, std::shared_ptr schema = NULLPTR, + DeviceAllocationType device_type = DeviceAllocationType::kCPU); /// \brief Create a RecordBatchReader from an Iterator of RecordBatch. /// /// \param[in] batches an iterator of RecordBatch to read from. /// \param[in] schema schema that each record batch in iterator will conform to. + /// \param[in] device_type the type of device that the batches are allocated on static Result> MakeFromIterator( - Iterator> batches, std::shared_ptr schema); + Iterator> batches, std::shared_ptr schema, + DeviceAllocationType device_type = DeviceAllocationType::kCPU); }; /// \brief Concatenate record batches diff --git a/cpp/src/arrow/scalar.cc b/cpp/src/arrow/scalar.cc index 8e8d3903663e4..252706fd0b387 100644 --- a/cpp/src/arrow/scalar.cc +++ b/cpp/src/arrow/scalar.cc @@ -563,15 +563,17 @@ Status Scalar::ValidateFull() const { BaseBinaryScalar::BaseBinaryScalar(std::string s, std::shared_ptr type) : BaseBinaryScalar(Buffer::FromString(std::move(s)), std::move(type)) {} -void BinaryScalar::FillScratchSpace() { +void BinaryScalar::FillScratchSpace(uint8_t* scratch_space, + const std::shared_ptr& value) { FillScalarScratchSpace( - scratch_space_, + scratch_space, {int32_t(0), value ? static_cast(value->size()) : int32_t(0)}); } -void BinaryViewScalar::FillScratchSpace() { +void BinaryViewScalar::FillScratchSpace(uint8_t* scratch_space, + const std::shared_ptr& value) { static_assert(sizeof(BinaryViewType::c_type) <= internal::kScalarScratchSpaceSize); - auto* view = new (&scratch_space_) BinaryViewType::c_type; + auto* view = new (scratch_space) BinaryViewType::c_type; if (value) { *view = util::ToBinaryView(std::string_view{*value}, 0, 0); } else { @@ -579,9 +581,10 @@ void BinaryViewScalar::FillScratchSpace() { } } -void LargeBinaryScalar::FillScratchSpace() { +void LargeBinaryScalar::FillScratchSpace(uint8_t* scratch_space, + const std::shared_ptr& value) { FillScalarScratchSpace( - scratch_space_, + scratch_space, {int64_t(0), value ? static_cast(value->size()) : int64_t(0)}); } @@ -612,36 +615,40 @@ BaseListScalar::BaseListScalar(std::shared_ptr value, } ListScalar::ListScalar(std::shared_ptr value, bool is_valid) - : BaseListScalar(value, list(value->type()), is_valid) {} + : ListScalar(value, list(value->type()), is_valid) {} -void ListScalar::FillScratchSpace() { +void ListScalar::FillScratchSpace(uint8_t* scratch_space, + const std::shared_ptr& value) { FillScalarScratchSpace( - scratch_space_, + scratch_space, {int32_t(0), value ? static_cast(value->length()) : int32_t(0)}); } LargeListScalar::LargeListScalar(std::shared_ptr value, bool is_valid) - : BaseListScalar(value, large_list(value->type()), is_valid) {} + : LargeListScalar(value, large_list(value->type()), is_valid) {} -void LargeListScalar::FillScratchSpace() { - FillScalarScratchSpace(scratch_space_, +void LargeListScalar::FillScratchSpace(uint8_t* scratch_space, + const std::shared_ptr& value) { + FillScalarScratchSpace(scratch_space, {int64_t(0), value ? value->length() : int64_t(0)}); } ListViewScalar::ListViewScalar(std::shared_ptr value, bool is_valid) - : BaseListScalar(value, list_view(value->type()), is_valid) {} + : ListViewScalar(value, list_view(value->type()), is_valid) {} -void ListViewScalar::FillScratchSpace() { +void ListViewScalar::FillScratchSpace(uint8_t* scratch_space, + const std::shared_ptr& value) { FillScalarScratchSpace( - scratch_space_, + scratch_space, {int32_t(0), value ? static_cast(value->length()) : int32_t(0)}); } LargeListViewScalar::LargeListViewScalar(std::shared_ptr value, bool is_valid) - : BaseListScalar(value, large_list_view(value->type()), is_valid) {} + : LargeListViewScalar(value, large_list_view(value->type()), is_valid) {} -void LargeListViewScalar::FillScratchSpace() { - FillScalarScratchSpace(scratch_space_, +void LargeListViewScalar::FillScratchSpace(uint8_t* scratch_space, + const std::shared_ptr& value) { + FillScalarScratchSpace(scratch_space, {int64_t(0), value ? value->length() : int64_t(0)}); } @@ -652,11 +659,12 @@ inline std::shared_ptr MakeMapType(const std::shared_ptr& pa } MapScalar::MapScalar(std::shared_ptr value, bool is_valid) - : BaseListScalar(value, MakeMapType(value->type()), is_valid) {} + : MapScalar(value, MakeMapType(value->type()), is_valid) {} -void MapScalar::FillScratchSpace() { +void MapScalar::FillScratchSpace(uint8_t* scratch_space, + const std::shared_ptr& value) { FillScalarScratchSpace( - scratch_space_, + scratch_space, {int32_t(0), value ? static_cast(value->length()) : int32_t(0)}); } @@ -705,7 +713,9 @@ Result> StructScalar::field(FieldRef ref) const { RunEndEncodedScalar::RunEndEncodedScalar(std::shared_ptr value, std::shared_ptr type) - : Scalar{std::move(type), value->is_valid}, value{std::move(value)} { + : Scalar{std::move(type), value->is_valid}, + ArraySpanFillFromScalarScratchSpace(*this->type), + value{std::move(value)} { ARROW_CHECK_EQ(this->type->id(), Type::RUN_END_ENCODED); } @@ -716,18 +726,18 @@ RunEndEncodedScalar::RunEndEncodedScalar(const std::shared_ptr& type) RunEndEncodedScalar::~RunEndEncodedScalar() = default; -void RunEndEncodedScalar::FillScratchSpace() { - auto run_end = run_end_type()->id(); +void RunEndEncodedScalar::FillScratchSpace(uint8_t* scratch_space, const DataType& type) { + Type::type run_end = checked_cast(type).run_end_type()->id(); switch (run_end) { case Type::INT16: - FillScalarScratchSpace(scratch_space_, {int16_t(1)}); + FillScalarScratchSpace(scratch_space, {int16_t(1)}); break; case Type::INT32: - FillScalarScratchSpace(scratch_space_, {int32_t(1)}); + FillScalarScratchSpace(scratch_space, {int32_t(1)}); break; default: DCHECK_EQ(run_end, Type::INT64); - FillScalarScratchSpace(scratch_space_, {int64_t(1)}); + FillScalarScratchSpace(scratch_space, {int64_t(1)}); } } @@ -806,6 +816,7 @@ Result TimestampScalar::FromISO8601(std::string_view iso8601, SparseUnionScalar::SparseUnionScalar(ValueType value, int8_t type_code, std::shared_ptr type) : UnionScalar(std::move(type), type_code, /*is_valid=*/true), + ArraySpanFillFromScalarScratchSpace(type_code), value(std::move(value)) { const auto child_ids = checked_cast(*this->type).child_ids(); if (type_code >= 0 && static_cast(type_code) < child_ids.size() && @@ -833,13 +844,13 @@ std::shared_ptr SparseUnionScalar::FromValue(std::shared_ptr val return std::make_shared(field_values, type_code, std::move(type)); } -void SparseUnionScalar::FillScratchSpace() { - auto* union_scratch_space = reinterpret_cast(&scratch_space_); +void SparseUnionScalar::FillScratchSpace(uint8_t* scratch_space, int8_t type_code) { + auto* union_scratch_space = reinterpret_cast(scratch_space); union_scratch_space->type_code = type_code; } -void DenseUnionScalar::FillScratchSpace() { - auto* union_scratch_space = reinterpret_cast(&scratch_space_); +void DenseUnionScalar::FillScratchSpace(uint8_t* scratch_space, int8_t type_code) { + auto* union_scratch_space = reinterpret_cast(scratch_space); union_scratch_space->type_code = type_code; FillScalarScratchSpace(union_scratch_space->offsets, {int32_t(0), int32_t(1)}); } @@ -1212,7 +1223,7 @@ Result> CastImpl(const StringScalar& from, ARROW_ASSIGN_OR_RAISE(auto out, Scalar::Parse(std::move(to_type), std::string_view(*from.value))); DCHECK(checked_pointer_cast(out) != nullptr); - return std::move(out); + return out; } // binary/large binary/large string to string @@ -1336,7 +1347,7 @@ struct FromTypeVisitor : CastImplVisitor { ARROW_ASSIGN_OR_RAISE( out_, CastImpl( checked_cast::ScalarType&>(from_), - std::move(to_type_))); + to_type_)); return Status::OK(); } @@ -1344,8 +1355,8 @@ struct FromTypeVisitor : CastImplVisitor { template typename std::enable_if_t::is_parameter_free, Status> Visit( const ToType&) { - ARROW_ASSIGN_OR_RAISE(out_, MakeScalar(std::move(to_type_), - checked_cast(from_).value)); + ARROW_ASSIGN_OR_RAISE( + out_, MakeScalar(to_type_, checked_cast(from_).value)); return Status::OK(); } diff --git a/cpp/src/arrow/scalar.h b/cpp/src/arrow/scalar.h index a7ee6a417d9a1..982a4c5113c92 100644 --- a/cpp/src/arrow/scalar.h +++ b/cpp/src/arrow/scalar.h @@ -141,7 +141,12 @@ struct ARROW_EXPORT ArraySpanFillFromScalarScratchSpace { alignas(int64_t) mutable uint8_t scratch_space_[kScalarScratchSpaceSize]; private: - ArraySpanFillFromScalarScratchSpace() { static_cast(this)->FillScratchSpace(); } + template + explicit ArraySpanFillFromScalarScratchSpace(Args&&... args) { + Impl::FillScratchSpace(scratch_space_, std::forward(args)...); + } + + ArraySpanFillFromScalarScratchSpace() = delete; friend Impl; }; @@ -278,20 +283,32 @@ struct ARROW_EXPORT BaseBinaryScalar : public internal::PrimitiveScalarBase { struct ARROW_EXPORT BinaryScalar : public BaseBinaryScalar, private internal::ArraySpanFillFromScalarScratchSpace { - using BaseBinaryScalar::BaseBinaryScalar; using TypeClass = BinaryType; using ArraySpanFillFromScalarScratchSpace = internal::ArraySpanFillFromScalarScratchSpace; + explicit BinaryScalar(std::shared_ptr type) + : BaseBinaryScalar(std::move(type)), + ArraySpanFillFromScalarScratchSpace(this->value) {} + + BinaryScalar(std::shared_ptr value, std::shared_ptr type) + : BaseBinaryScalar(std::move(value), std::move(type)), + ArraySpanFillFromScalarScratchSpace(this->value) {} + + BinaryScalar(std::string s, std::shared_ptr type) + : BaseBinaryScalar(std::move(s), std::move(type)), + ArraySpanFillFromScalarScratchSpace(this->value) {} + explicit BinaryScalar(std::shared_ptr value) : BinaryScalar(std::move(value), binary()) {} - explicit BinaryScalar(std::string s) : BaseBinaryScalar(std::move(s), binary()) {} + explicit BinaryScalar(std::string s) : BinaryScalar(std::move(s), binary()) {} BinaryScalar() : BinaryScalar(binary()) {} private: - void FillScratchSpace(); + static void FillScratchSpace(uint8_t* scratch_space, + const std::shared_ptr& value); friend ArraySpan; friend ArraySpanFillFromScalarScratchSpace; @@ -312,23 +329,35 @@ struct ARROW_EXPORT StringScalar : public BinaryScalar { struct ARROW_EXPORT BinaryViewScalar : public BaseBinaryScalar, private internal::ArraySpanFillFromScalarScratchSpace { - using BaseBinaryScalar::BaseBinaryScalar; using TypeClass = BinaryViewType; using ArraySpanFillFromScalarScratchSpace = internal::ArraySpanFillFromScalarScratchSpace; + explicit BinaryViewScalar(std::shared_ptr type) + : BaseBinaryScalar(std::move(type)), + ArraySpanFillFromScalarScratchSpace(this->value) {} + + BinaryViewScalar(std::shared_ptr value, std::shared_ptr type) + : BaseBinaryScalar(std::move(value), std::move(type)), + ArraySpanFillFromScalarScratchSpace(this->value) {} + + BinaryViewScalar(std::string s, std::shared_ptr type) + : BaseBinaryScalar(std::move(s), std::move(type)), + ArraySpanFillFromScalarScratchSpace(this->value) {} + explicit BinaryViewScalar(std::shared_ptr value) : BinaryViewScalar(std::move(value), binary_view()) {} explicit BinaryViewScalar(std::string s) - : BaseBinaryScalar(std::move(s), binary_view()) {} + : BinaryViewScalar(std::move(s), binary_view()) {} BinaryViewScalar() : BinaryViewScalar(binary_view()) {} std::string_view view() const override { return std::string_view(*this->value); } private: - void FillScratchSpace(); + static void FillScratchSpace(uint8_t* scratch_space, + const std::shared_ptr& value); friend ArraySpan; friend ArraySpanFillFromScalarScratchSpace; @@ -350,24 +379,33 @@ struct ARROW_EXPORT StringViewScalar : public BinaryViewScalar { struct ARROW_EXPORT LargeBinaryScalar : public BaseBinaryScalar, private internal::ArraySpanFillFromScalarScratchSpace { - using BaseBinaryScalar::BaseBinaryScalar; using TypeClass = LargeBinaryType; using ArraySpanFillFromScalarScratchSpace = internal::ArraySpanFillFromScalarScratchSpace; + explicit LargeBinaryScalar(std::shared_ptr type) + : BaseBinaryScalar(std::move(type)), + ArraySpanFillFromScalarScratchSpace(this->value) {} + LargeBinaryScalar(std::shared_ptr value, std::shared_ptr type) - : BaseBinaryScalar(std::move(value), std::move(type)) {} + : BaseBinaryScalar(std::move(value), std::move(type)), + ArraySpanFillFromScalarScratchSpace(this->value) {} + + LargeBinaryScalar(std::string s, std::shared_ptr type) + : BaseBinaryScalar(std::move(s), std::move(type)), + ArraySpanFillFromScalarScratchSpace(this->value) {} explicit LargeBinaryScalar(std::shared_ptr value) : LargeBinaryScalar(std::move(value), large_binary()) {} explicit LargeBinaryScalar(std::string s) - : BaseBinaryScalar(std::move(s), large_binary()) {} + : LargeBinaryScalar(std::move(s), large_binary()) {} LargeBinaryScalar() : LargeBinaryScalar(large_binary()) {} private: - void FillScratchSpace(); + static void FillScratchSpace(uint8_t* scratch_space, + const std::shared_ptr& value); friend ArraySpan; friend ArraySpanFillFromScalarScratchSpace; @@ -550,14 +588,19 @@ struct ARROW_EXPORT ListScalar : public BaseListScalar, private internal::ArraySpanFillFromScalarScratchSpace { using TypeClass = ListType; - using BaseListScalar::BaseListScalar; using ArraySpanFillFromScalarScratchSpace = internal::ArraySpanFillFromScalarScratchSpace; + ListScalar(std::shared_ptr value, std::shared_ptr type, + bool is_valid = true) + : BaseListScalar(std::move(value), std::move(type), is_valid), + ArraySpanFillFromScalarScratchSpace(this->value) {} + explicit ListScalar(std::shared_ptr value, bool is_valid = true); private: - void FillScratchSpace(); + static void FillScratchSpace(uint8_t* scratch_space, + const std::shared_ptr& value); friend ArraySpan; friend ArraySpanFillFromScalarScratchSpace; @@ -567,14 +610,19 @@ struct ARROW_EXPORT LargeListScalar : public BaseListScalar, private internal::ArraySpanFillFromScalarScratchSpace { using TypeClass = LargeListType; - using BaseListScalar::BaseListScalar; using ArraySpanFillFromScalarScratchSpace = internal::ArraySpanFillFromScalarScratchSpace; + LargeListScalar(std::shared_ptr value, std::shared_ptr type, + bool is_valid = true) + : BaseListScalar(std::move(value), std::move(type), is_valid), + ArraySpanFillFromScalarScratchSpace(this->value) {} + explicit LargeListScalar(std::shared_ptr value, bool is_valid = true); private: - void FillScratchSpace(); + static void FillScratchSpace(uint8_t* scratch_space, + const std::shared_ptr& value); friend ArraySpan; friend ArraySpanFillFromScalarScratchSpace; @@ -584,14 +632,19 @@ struct ARROW_EXPORT ListViewScalar : public BaseListScalar, private internal::ArraySpanFillFromScalarScratchSpace { using TypeClass = ListViewType; - using BaseListScalar::BaseListScalar; using ArraySpanFillFromScalarScratchSpace = internal::ArraySpanFillFromScalarScratchSpace; + ListViewScalar(std::shared_ptr value, std::shared_ptr type, + bool is_valid = true) + : BaseListScalar(std::move(value), std::move(type), is_valid), + ArraySpanFillFromScalarScratchSpace(this->value) {} + explicit ListViewScalar(std::shared_ptr value, bool is_valid = true); private: - void FillScratchSpace(); + static void FillScratchSpace(uint8_t* scratch_space, + const std::shared_ptr& value); friend ArraySpan; friend ArraySpanFillFromScalarScratchSpace; @@ -601,14 +654,19 @@ struct ARROW_EXPORT LargeListViewScalar : public BaseListScalar, private internal::ArraySpanFillFromScalarScratchSpace { using TypeClass = LargeListViewType; - using BaseListScalar::BaseListScalar; using ArraySpanFillFromScalarScratchSpace = internal::ArraySpanFillFromScalarScratchSpace; + LargeListViewScalar(std::shared_ptr value, std::shared_ptr type, + bool is_valid = true) + : BaseListScalar(std::move(value), std::move(type), is_valid), + ArraySpanFillFromScalarScratchSpace(this->value) {} + explicit LargeListViewScalar(std::shared_ptr value, bool is_valid = true); private: - void FillScratchSpace(); + static void FillScratchSpace(uint8_t* scratch_space, + const std::shared_ptr& value); friend ArraySpan; friend ArraySpanFillFromScalarScratchSpace; @@ -618,14 +676,19 @@ struct ARROW_EXPORT MapScalar : public BaseListScalar, private internal::ArraySpanFillFromScalarScratchSpace { using TypeClass = MapType; - using BaseListScalar::BaseListScalar; using ArraySpanFillFromScalarScratchSpace = internal::ArraySpanFillFromScalarScratchSpace; + MapScalar(std::shared_ptr value, std::shared_ptr type, + bool is_valid = true) + : BaseListScalar(std::move(value), std::move(type), is_valid), + ArraySpanFillFromScalarScratchSpace(this->value) {} + explicit MapScalar(std::shared_ptr value, bool is_valid = true); private: - void FillScratchSpace(); + static void FillScratchSpace(uint8_t* scratch_space, + const std::shared_ptr& value); friend ArraySpan; friend ArraySpanFillFromScalarScratchSpace; @@ -707,7 +770,7 @@ struct ARROW_EXPORT SparseUnionScalar std::shared_ptr type); private: - void FillScratchSpace(); + static void FillScratchSpace(uint8_t* scratch_space, int8_t type_code); friend ArraySpan; friend ArraySpanFillFromScalarScratchSpace; @@ -733,10 +796,11 @@ struct ARROW_EXPORT DenseUnionScalar DenseUnionScalar(ValueType value, int8_t type_code, std::shared_ptr type) : UnionScalar(std::move(type), type_code, value->is_valid), + ArraySpanFillFromScalarScratchSpace(type_code), value(std::move(value)) {} private: - void FillScratchSpace(); + static void FillScratchSpace(uint8_t* scratch_space, int8_t type_code); friend ArraySpan; friend ArraySpanFillFromScalarScratchSpace; @@ -772,7 +836,7 @@ struct ARROW_EXPORT RunEndEncodedScalar private: const TypeClass& ree_type() const { return internal::checked_cast(*type); } - void FillScratchSpace(); + static void FillScratchSpace(uint8_t* scratch_space, const DataType& type); friend ArraySpan; friend ArraySpanFillFromScalarScratchSpace; diff --git a/cpp/src/arrow/table.cc b/cpp/src/arrow/table.cc index 967e78f6b4db1..5dc5e4c1a9a8c 100644 --- a/cpp/src/arrow/table.cc +++ b/cpp/src/arrow/table.cc @@ -619,6 +619,7 @@ TableBatchReader::TableBatchReader(const Table& table) for (int i = 0; i < table.num_columns(); ++i) { column_data_[i] = table.column(i).get(); } + DCHECK(table_.Validate().ok()); } TableBatchReader::TableBatchReader(std::shared_ptr
table) @@ -632,6 +633,7 @@ TableBatchReader::TableBatchReader(std::shared_ptr
table) for (int i = 0; i < owned_table_->num_columns(); ++i) { column_data_[i] = owned_table_->column(i).get(); } + DCHECK(table_.Validate().ok()); } std::shared_ptr TableBatchReader::schema() const { return table_.schema(); } diff --git a/cpp/src/arrow/table.h b/cpp/src/arrow/table.h index a7508430c132b..79675fa92b1f3 100644 --- a/cpp/src/arrow/table.h +++ b/cpp/src/arrow/table.h @@ -241,6 +241,8 @@ class ARROW_EXPORT Table { /// /// The conversion is zero-copy: each record batch is a view over a slice /// of the table's columns. +/// +/// The table is expected to be valid prior to using it with the batch reader. class ARROW_EXPORT TableBatchReader : public RecordBatchReader { public: /// \brief Construct a TableBatchReader for the given table diff --git a/cpp/src/arrow/table_builder.cc b/cpp/src/arrow/table_builder.cc index 19ca151ac200f..8dc2efd19d90d 100644 --- a/cpp/src/arrow/table_builder.cc +++ b/cpp/src/arrow/table_builder.cc @@ -47,7 +47,7 @@ Result> RecordBatchBuilder::Make( new RecordBatchBuilder(schema, pool, initial_capacity)); RETURN_NOT_OK(builder->CreateBuilders()); RETURN_NOT_OK(builder->InitBuilders()); - return std::move(builder); + return builder; } Result> RecordBatchBuilder::Flush(bool reset_builders) { diff --git a/cpp/src/arrow/tensor.cc b/cpp/src/arrow/tensor.cc index 77ccedbde15c6..b47f1a1075b37 100644 --- a/cpp/src/arrow/tensor.cc +++ b/cpp/src/arrow/tensor.cc @@ -18,6 +18,7 @@ #include "arrow/tensor.h" #include +#include #include #include #include @@ -27,12 +28,14 @@ #include #include +#include "arrow/record_batch.h" #include "arrow/status.h" #include "arrow/type.h" #include "arrow/type_traits.h" #include "arrow/util/checked_cast.h" #include "arrow/util/int_util_overflow.h" #include "arrow/util/logging.h" +#include "arrow/util/unreachable.h" #include "arrow/visit_type_inline.h" namespace arrow { @@ -220,6 +223,209 @@ Status ValidateTensorParameters(const std::shared_ptr& type, return Status::OK(); } +template +struct ConvertColumnsToTensorVisitor { + Out*& out_values; + const ArrayData& in_data; + + template + Status Visit(const T&) { + if constexpr (is_numeric(T::type_id)) { + using In = typename T::c_type; + auto in_values = ArraySpan(in_data).GetSpan(1, in_data.length); + + if (in_data.null_count == 0) { + if constexpr (std::is_same_v) { + memcpy(out_values, in_values.data(), in_values.size_bytes()); + out_values += in_values.size(); + } else { + for (In in_value : in_values) { + *out_values++ = static_cast(in_value); + } + } + } else { + for (int64_t i = 0; i < in_data.length; ++i) { + *out_values++ = + in_data.IsNull(i) ? static_cast(NAN) : static_cast(in_values[i]); + } + } + return Status::OK(); + } + Unreachable(); + } +}; + +template +struct ConvertColumnsToTensorRowMajorVisitor { + Out*& out_values; + const ArrayData& in_data; + int num_cols; + int col_idx; + + template + Status Visit(const T&) { + if constexpr (is_numeric(T::type_id)) { + using In = typename T::c_type; + auto in_values = ArraySpan(in_data).GetSpan(1, in_data.length); + + if (in_data.null_count == 0) { + for (int64_t i = 0; i < in_data.length; ++i) { + out_values[i * num_cols + col_idx] = static_cast(in_values[i]); + } + } else { + for (int64_t i = 0; i < in_data.length; ++i) { + out_values[i * num_cols + col_idx] = + in_data.IsNull(i) ? static_cast(NAN) : static_cast(in_values[i]); + } + } + return Status::OK(); + } + Unreachable(); + } +}; + +template +inline void ConvertColumnsToTensor(const RecordBatch& batch, uint8_t* out, + bool row_major) { + using CType = typename arrow::TypeTraits::CType; + auto* out_values = reinterpret_cast(out); + + int i = 0; + for (const auto& column : batch.columns()) { + if (row_major) { + ConvertColumnsToTensorRowMajorVisitor visitor{out_values, *column->data(), + batch.num_columns(), i++}; + DCHECK_OK(VisitTypeInline(*column->type(), &visitor)); + } else { + ConvertColumnsToTensorVisitor visitor{out_values, *column->data()}; + DCHECK_OK(VisitTypeInline(*column->type(), &visitor)); + } + } +} + +Status RecordBatchToTensor(const RecordBatch& batch, bool null_to_nan, bool row_major, + MemoryPool* pool, std::shared_ptr* tensor) { + if (batch.num_columns() == 0) { + return Status::TypeError( + "Conversion to Tensor for RecordBatches without columns/schema is not " + "supported."); + } + // Check for no validity bitmap of each field + // if null_to_nan conversion is set to false + for (int i = 0; i < batch.num_columns(); ++i) { + if (batch.column(i)->null_count() > 0 && !null_to_nan) { + return Status::TypeError( + "Can only convert a RecordBatch with no nulls. Set null_to_nan to true to " + "convert nulls to NaN"); + } + } + + // Check for supported data types and merge fields + // to get the resulting uniform data type + if (!is_integer(batch.column(0)->type()->id()) && + !is_floating(batch.column(0)->type()->id())) { + return Status::TypeError("DataType is not supported: ", + batch.column(0)->type()->ToString()); + } + std::shared_ptr result_field = batch.schema()->field(0); + std::shared_ptr result_type = result_field->type(); + + Field::MergeOptions options; + options.promote_integer_to_float = true; + options.promote_integer_sign = true; + options.promote_numeric_width = true; + + if (batch.num_columns() > 1) { + for (int i = 1; i < batch.num_columns(); ++i) { + if (!is_numeric(batch.column(i)->type()->id())) { + return Status::TypeError("DataType is not supported: ", + batch.column(i)->type()->ToString()); + } + + // Casting of float16 is not supported, throw an error in this case + if ((batch.column(i)->type()->id() == Type::HALF_FLOAT || + result_field->type()->id() == Type::HALF_FLOAT) && + batch.column(i)->type()->id() != result_field->type()->id()) { + return Status::NotImplemented("Casting from or to halffloat is not supported."); + } + + ARROW_ASSIGN_OR_RAISE( + result_field, + result_field->MergeWith( + batch.schema()->field(i)->WithName(result_field->name()), options)); + } + result_type = result_field->type(); + } + + // Check if result_type is signed or unsigned integer and null_to_nan is set to true + // Then all columns should be promoted to float type + if (is_integer(result_type->id()) && null_to_nan) { + ARROW_ASSIGN_OR_RAISE( + result_field, + result_field->MergeWith(field(result_field->name(), float32()), options)); + result_type = result_field->type(); + } + + // Allocate memory + ARROW_ASSIGN_OR_RAISE( + std::shared_ptr result, + AllocateBuffer(result_type->bit_width() * batch.num_columns() * batch.num_rows(), + pool)); + // Copy data + switch (result_type->id()) { + case Type::UINT8: + ConvertColumnsToTensor(batch, result->mutable_data(), row_major); + break; + case Type::UINT16: + case Type::HALF_FLOAT: + ConvertColumnsToTensor(batch, result->mutable_data(), row_major); + break; + case Type::UINT32: + ConvertColumnsToTensor(batch, result->mutable_data(), row_major); + break; + case Type::UINT64: + ConvertColumnsToTensor(batch, result->mutable_data(), row_major); + break; + case Type::INT8: + ConvertColumnsToTensor(batch, result->mutable_data(), row_major); + break; + case Type::INT16: + ConvertColumnsToTensor(batch, result->mutable_data(), row_major); + break; + case Type::INT32: + ConvertColumnsToTensor(batch, result->mutable_data(), row_major); + break; + case Type::INT64: + ConvertColumnsToTensor(batch, result->mutable_data(), row_major); + break; + case Type::FLOAT: + ConvertColumnsToTensor(batch, result->mutable_data(), row_major); + break; + case Type::DOUBLE: + ConvertColumnsToTensor(batch, result->mutable_data(), row_major); + break; + default: + return Status::TypeError("DataType is not supported: ", result_type->ToString()); + } + + // Construct Tensor object + const auto& fixed_width_type = + internal::checked_cast(*result_type); + std::vector shape = {batch.num_rows(), batch.num_columns()}; + std::vector strides; + + if (row_major) { + ARROW_RETURN_NOT_OK( + internal::ComputeRowMajorStrides(fixed_width_type, shape, &strides)); + } else { + ARROW_RETURN_NOT_OK( + internal::ComputeColumnMajorStrides(fixed_width_type, shape, &strides)); + } + ARROW_ASSIGN_OR_RAISE(*tensor, + Tensor::Make(result_type, std::move(result), shape, strides)); + return Status::OK(); +} + } // namespace internal /// Constructor with strides and dimension names diff --git a/cpp/src/arrow/tensor.h b/cpp/src/arrow/tensor.h index ff6f3735f9193..dd3a21fae495a 100644 --- a/cpp/src/arrow/tensor.h +++ b/cpp/src/arrow/tensor.h @@ -77,6 +77,10 @@ Status ValidateTensorParameters(const std::shared_ptr& type, const std::vector& strides, const std::vector& dim_names); +ARROW_EXPORT +Status RecordBatchToTensor(const RecordBatch& batch, bool null_to_nan, bool row_major, + MemoryPool* pool, std::shared_ptr* tensor); + } // namespace internal class ARROW_EXPORT Tensor { diff --git a/cpp/src/arrow/testing/builder.h b/cpp/src/arrow/testing/builder.h index 09e8f49dea9eb..6beb7760e3bbf 100644 --- a/cpp/src/arrow/testing/builder.h +++ b/cpp/src/arrow/testing/builder.h @@ -195,7 +195,7 @@ Result> ArrayFromBuilderVisitor( std::shared_ptr out; RETURN_NOT_OK(builder->Finish(&out)); - return std::move(out); + return out; } template diff --git a/cpp/src/arrow/testing/extension_type.h b/cpp/src/arrow/testing/extension_type.h index 846e3c7a16578..6515631f202ae 100644 --- a/cpp/src/arrow/testing/extension_type.h +++ b/cpp/src/arrow/testing/extension_type.h @@ -132,6 +132,25 @@ class ARROW_TESTING_EXPORT DictExtensionType : public ExtensionType { std::string Serialize() const override { return "dict-extension-serialized"; } }; +// A minimal extension type that does not error when passed blank extension information +class ARROW_TESTING_EXPORT MetadataOptionalExtensionType : public ExtensionType { + public: + MetadataOptionalExtensionType() : ExtensionType(null()) {} + std::string extension_name() const override { return "metadata.optional"; } + std::string Serialize() const override { return ""; } + std::shared_ptr MakeArray(std::shared_ptr data) const override { + return nullptr; + } + bool ExtensionEquals(const ExtensionType& other) const override { + return other.extension_name() == extension_name(); + } + Result> Deserialize( + std::shared_ptr storage_type, + const std::string& serialized_data) const override { + return std::make_shared(); + } +}; + class ARROW_TESTING_EXPORT Complex128Array : public ExtensionArray { public: using ExtensionArray::ExtensionArray; diff --git a/cpp/src/arrow/testing/fixed_width_test_util.cc b/cpp/src/arrow/testing/fixed_width_test_util.cc new file mode 100644 index 0000000000000..9c305ed1df97c --- /dev/null +++ b/cpp/src/arrow/testing/fixed_width_test_util.cc @@ -0,0 +1,181 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include +#include +#include +#include + +#include "arrow/array/builder_base.h" +#include "arrow/array/builder_nested.h" +#include "arrow/array/builder_primitive.h" +#include "arrow/testing/fixed_width_test_util.h" +#include "arrow/type.h" +#include "arrow/util/checked_cast.h" + +namespace arrow::util::internal { + +namespace { +template +inline Status AppendNumeric(ArrayBuilder* builder, int64_t* next_value) { + using NumericBuilder = ::arrow::NumericBuilder; + using value_type = typename NumericBuilder::value_type; + auto* numeric_builder = ::arrow::internal::checked_cast(builder); + auto cast_next_value = + static_cast(*next_value % std::numeric_limits::max()); + RETURN_NOT_OK(numeric_builder->Append(cast_next_value)); + *next_value += 1; + return Status::OK(); +} +} // namespace + +std::shared_ptr NestedListGenerator::NestedFSLType( + const std::shared_ptr& inner_type, const std::vector& sizes) { + auto type = inner_type; + for (auto it = sizes.rbegin(); it != sizes.rend(); it++) { + type = fixed_size_list(type, *it); + } + return type; +} + +std::shared_ptr NestedListGenerator::NestedListType( + const std::shared_ptr& inner_type, size_t depth) { + auto list_type = list(inner_type); + for (size_t i = 1; i < depth; i++) { + list_type = list(std::move(list_type)); + } + return list_type; +} + +Result> NestedListGenerator::NestedFSLArray( + const std::shared_ptr& inner_type, const std::vector& list_sizes, + int64_t length) { + auto nested_type = NestedFSLType(inner_type, list_sizes); + ARROW_ASSIGN_OR_RAISE(auto builder, MakeBuilder(nested_type)); + return NestedListArray(builder.get(), list_sizes, length); +} + +Result> NestedListGenerator::NestedListArray( + const std::shared_ptr& inner_type, const std::vector& list_sizes, + int64_t length) { + auto nested_type = NestedListType(inner_type, list_sizes.size()); + ARROW_ASSIGN_OR_RAISE(auto builder, MakeBuilder(nested_type)); + return NestedListArray(builder.get(), list_sizes, length); +} + +void NestedListGenerator::VisitAllNestedListConfigurations( + const std::vector>& inner_value_types, + const std::function&, const std::vector&)>& + visit, + int max_depth, int max_power_of_2_size) { + for (int depth = 1; depth <= max_depth; depth++) { + for (auto& type : inner_value_types) { + assert(is_fixed_width(*type)); + int value_width = type->byte_width(); + + std::vector list_sizes; // stack of list sizes + auto pop = [&]() { // pop the list_sizes stack + assert(!list_sizes.empty()); + value_width /= list_sizes.back(); + list_sizes.pop_back(); + }; + auto next = [&]() { // double the top of the stack + assert(!list_sizes.empty()); + value_width *= 2; + list_sizes.back() *= 2; + return value_width; + }; + auto push_1s = [&]() { // fill the stack with 1s + while (list_sizes.size() < static_cast(depth)) { + list_sizes.push_back(1); + } + }; + + // Loop invariants: + // value_width == product(list_sizes) * type->byte_width() + // value_width is a power-of-2 (1, 2, 4, 8, 16, max_power_of_2_size=32) + push_1s(); + do { + // for (auto x : list_sizes) printf("%d * ", x); + // printf("(%s) %d = %2d\n", type->name().c_str(), type->byte_width(), + // value_width); + visit(type, list_sizes); + while (!list_sizes.empty()) { + if (next() <= max_power_of_2_size) { + push_1s(); + break; + } + pop(); + } + } while (!list_sizes.empty()); + } + } +} + +Status NestedListGenerator::AppendNestedList(ArrayBuilder* nested_builder, + const int* list_sizes, + int64_t* next_inner_value) { + using ::arrow::internal::checked_cast; + ArrayBuilder* builder = nested_builder; + auto type = builder->type(); + if (type->id() == Type::FIXED_SIZE_LIST || type->id() == Type::LIST) { + const int list_size = *list_sizes; + if (type->id() == Type::FIXED_SIZE_LIST) { + auto* fsl_builder = checked_cast(builder); + assert(list_size == checked_cast(*type).list_size()); + RETURN_NOT_OK(fsl_builder->Append()); + builder = fsl_builder->value_builder(); + } else { // type->id() == Type::LIST) + auto* list_builder = checked_cast(builder); + RETURN_NOT_OK(list_builder->Append(/*is_valid=*/true, list_size)); + builder = list_builder->value_builder(); + } + list_sizes++; + for (int i = 0; i < list_size; i++) { + RETURN_NOT_OK(AppendNestedList(builder, list_sizes, next_inner_value)); + } + } else { + switch (type->id()) { + case Type::INT8: + RETURN_NOT_OK(AppendNumeric(builder, next_inner_value)); + break; + case Type::INT16: + RETURN_NOT_OK(AppendNumeric(builder, next_inner_value)); + break; + case Type::INT32: + RETURN_NOT_OK(AppendNumeric(builder, next_inner_value)); + break; + case Type::INT64: + RETURN_NOT_OK(AppendNumeric(builder, next_inner_value)); + break; + default: + return Status::NotImplemented("Unsupported type: ", *type); + } + } + return Status::OK(); +} + +Result> NestedListGenerator::NestedListArray( + ArrayBuilder* nested_builder, const std::vector& list_sizes, int64_t length) { + int64_t next_inner_value = 0; + for (int64_t i = 0; i < length; i++) { + RETURN_NOT_OK(AppendNestedList(nested_builder, list_sizes.data(), &next_inner_value)); + } + return nested_builder->Finish(); +} + +} // namespace arrow::util::internal diff --git a/cpp/src/arrow/testing/fixed_width_test_util.h b/cpp/src/arrow/testing/fixed_width_test_util.h new file mode 100644 index 0000000000000..9e5e6fa68509e --- /dev/null +++ b/cpp/src/arrow/testing/fixed_width_test_util.h @@ -0,0 +1,76 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include +#include +#include +#include + +#include "arrow/testing/visibility.h" +#include "arrow/type.h" +#include "arrow/type_fwd.h" + +namespace arrow::util::internal { + +class ARROW_TESTING_EXPORT NestedListGenerator { + public: + /// \brief Create a nested FixedSizeListType. + /// + /// \return `fixed_size_list(fixed_size_list(..., sizes[1]), sizes[0])` + static std::shared_ptr NestedFSLType( + const std::shared_ptr& inner_type, const std::vector& sizes); + + /// \brief Create a nested FixedListType. + /// + /// \return `list(list(...))` + static std::shared_ptr NestedListType( + const std::shared_ptr& inner_type, size_t depth); + + static Result> NestedFSLArray( + const std::shared_ptr& inner_type, const std::vector& list_sizes, + int64_t length); + + static Result> NestedListArray( + const std::shared_ptr& inner_type, const std::vector& list_sizes, + int64_t length); + + /// \brief Generate all possible nested list configurations of depth 1 to max_depth. + /// + /// Each configuration consists of a single inner value type and a list of sizes. + /// Both can be used with NestedFSLArray and NestedListArray to generate test data. + /// + /// The product of the list sizes and the size of the inner value type is always a power + /// of 2 no greater than max_power_of_2_size. For max_depth=3 and + /// max_power_of_2_size=32, this generates 108 configurations. + static void VisitAllNestedListConfigurations( + const std::vector>& inner_value_types, + const std::function&, + const std::vector&)>& visit, + int max_depth = 3, int max_power_of_2_size = 32); + + private: + // Append([...[[*next_inner_value++, *next_inner_value++, ...]]...]) + static Status AppendNestedList(ArrayBuilder* nested_builder, const int* list_sizes, + int64_t* next_inner_value); + + static Result> NestedListArray( + ArrayBuilder* nested_builder, const std::vector& list_sizes, int64_t length); +}; + +} // namespace arrow::util::internal diff --git a/cpp/src/arrow/type.cc b/cpp/src/arrow/type.cc index edf8f0496628c..8ce03a91c70ae 100644 --- a/cpp/src/arrow/type.cc +++ b/cpp/src/arrow/type.cc @@ -729,7 +729,7 @@ Result> MaybeMergeListTypes( auto item_field, left.item_field()->MergeWith( *right.item_field()->WithName(left.item_field()->name()), options)); - return map(std::move(key_field->type()), std::move(item_field), + return map(key_field->type(), std::move(item_field), /*keys_sorted=*/left.keys_sorted() && right.keys_sorted()); } else if (promoted_type->id() == Type::STRUCT && other_type->id() == Type::STRUCT) { return MergeStructs(promoted_type, other_type, options); @@ -1696,7 +1696,7 @@ class NestedSelector { } } - return std::move(child_data); + return child_data; } static Result> GetChild(const Array& array, int i, diff --git a/cpp/src/arrow/type.h b/cpp/src/arrow/type.h index 58c9df04ec5c3..bb05e6efdb987 100644 --- a/cpp/src/arrow/type.h +++ b/cpp/src/arrow/type.h @@ -1723,7 +1723,7 @@ class ARROW_EXPORT MonthIntervalType : public IntervalType { MonthIntervalType() : IntervalType(type_id) {} - std::string ToString([[maybe_unused]] bool show_metadata = false) const override { + std::string ToString(bool ARROW_ARG_UNUSED(show_metadata) = false) const override { return name(); } std::string name() const override { return "month_interval"; } @@ -1761,7 +1761,7 @@ class ARROW_EXPORT DayTimeIntervalType : public IntervalType { int bit_width() const override { return static_cast(sizeof(c_type) * CHAR_BIT); } - std::string ToString([[maybe_unused]] bool show_metadata = false) const override { + std::string ToString(bool ARROW_ARG_UNUSED(show_metadata) = false) const override { return name(); } std::string name() const override { return "day_time_interval"; } @@ -1803,7 +1803,7 @@ class ARROW_EXPORT MonthDayNanoIntervalType : public IntervalType { int bit_width() const override { return static_cast(sizeof(c_type) * CHAR_BIT); } - std::string ToString([[maybe_unused]] bool show_metadata = false) const override { + std::string ToString(bool ARROW_ARG_UNUSED(show_metadata) = false) const override { return name(); } std::string name() const override { return "month_day_nano_interval"; } diff --git a/cpp/src/arrow/util/CMakeLists.txt b/cpp/src/arrow/util/CMakeLists.txt index e26efba28594b..087e4e3879e56 100644 --- a/cpp/src/arrow/util/CMakeLists.txt +++ b/cpp/src/arrow/util/CMakeLists.txt @@ -56,6 +56,7 @@ add_arrow_test(utility-test compression_test.cc decimal_test.cc float16_test.cc + fixed_width_test.cc formatting_util_test.cc key_value_metadata_test.cc hashing_test.cc diff --git a/cpp/src/arrow/util/align_util.cc b/cpp/src/arrow/util/align_util.cc index 7bc687b155052..a327afa7a5cc3 100644 --- a/cpp/src/arrow/util/align_util.cc +++ b/cpp/src/arrow/util/align_util.cc @@ -159,9 +159,10 @@ Result> EnsureAlignment(std::shared_ptr buffer, auto new_buffer, AllocateBuffer(buffer->size(), minimum_desired_alignment, memory_pool)); std::memcpy(new_buffer->mutable_data(), buffer->data(), buffer->size()); - return std::move(new_buffer); + // R build with openSUSE155 requires an explicit shared_ptr construction + return std::shared_ptr(std::move(new_buffer)); } else { - return std::move(buffer); + return buffer; } } @@ -197,9 +198,9 @@ Result> EnsureAlignment(std::shared_ptr ar auto new_array_data = ArrayData::Make( array_data->type, array_data->length, std::move(buffers), array_data->child_data, array_data->dictionary, array_data->GetNullCount(), array_data->offset); - return std::move(new_array_data); + return new_array_data; } else { - return std::move(array_data); + return array_data; } } @@ -210,7 +211,7 @@ Result> EnsureAlignment(std::shared_ptr array, EnsureAlignment(array->data(), alignment, memory_pool)); if (new_array_data.get() == array->data().get()) { - return std::move(array); + return array; } else { return MakeArray(std::move(new_array_data)); } @@ -230,7 +231,7 @@ Result> EnsureAlignment(std::shared_ptrtype()); } else { - return std::move(array); + return array; } } @@ -248,7 +249,7 @@ Result> EnsureAlignment(std::shared_ptrschema(), batch->num_rows(), std::move(columns_)); } else { - return std::move(batch); + return batch; } } @@ -275,7 +276,7 @@ Result> EnsureAlignment(std::shared_ptr
table, } return Table::Make(table->schema(), std::move(columns_), table->num_rows()); } else { - return std::move(table); + return table; } } diff --git a/cpp/src/arrow/util/async_generator.h b/cpp/src/arrow/util/async_generator.h index f9bcd534567c6..fd66298d1a9d6 100644 --- a/cpp/src/arrow/util/async_generator.h +++ b/cpp/src/arrow/util/async_generator.h @@ -1962,7 +1962,7 @@ AsyncGenerator MakeFailingGenerator(Status st) { return [state]() -> Future { auto st = std::move(*state); if (!st.ok()) { - return std::move(st); + return st; } else { return AsyncGeneratorEnd(); } diff --git a/cpp/src/arrow/util/bit_util_benchmark.cc b/cpp/src/arrow/util/bit_util_benchmark.cc index 0bf2c26f12486..43f3fb33cd7fd 100644 --- a/cpp/src/arrow/util/bit_util_benchmark.cc +++ b/cpp/src/arrow/util/bit_util_benchmark.cc @@ -107,7 +107,7 @@ static std::shared_ptr CreateRandomBuffer(int64_t nbytes) { auto buffer = *AllocateBuffer(nbytes); memset(buffer->mutable_data(), 0, nbytes); random_bytes(nbytes, /*seed=*/0, buffer->mutable_data()); - return std::move(buffer); + return buffer; } static std::shared_ptr CreateRandomBitsBuffer(int64_t nbits, diff --git a/cpp/src/arrow/util/bitmap_builders.cc b/cpp/src/arrow/util/bitmap_builders.cc index 0348b1303b96b..c5cf3d2bc72b5 100644 --- a/cpp/src/arrow/util/bitmap_builders.cc +++ b/cpp/src/arrow/util/bitmap_builders.cc @@ -51,7 +51,8 @@ Result> BytesToBits(const std::vector& bytes, uint8_t* out_buf = buffer->mutable_data(); memset(out_buf, 0, static_cast(buffer->capacity())); FillBitsFromBytes(bytes, out_buf); - return std::move(buffer); + // R build with openSUSE155 requires an explicit shared_ptr construction + return std::shared_ptr(std::move(buffer)); } Result> BitmapAllButOne(MemoryPool* pool, int64_t length, @@ -66,7 +67,8 @@ Result> BitmapAllButOne(MemoryPool* pool, int64_t length auto bitmap_data = buffer->mutable_data(); bit_util::SetBitsTo(bitmap_data, 0, length, value); bit_util::SetBitTo(bitmap_data, straggler_pos, !value); - return std::move(buffer); + // R build with openSUSE155 requires an explicit shared_ptr construction + return std::shared_ptr(std::move(buffer)); } } // namespace internal diff --git a/cpp/src/arrow/util/bitmap_reader_benchmark.cc b/cpp/src/arrow/util/bitmap_reader_benchmark.cc index 1427adb13e131..b3c199ec3bd55 100644 --- a/cpp/src/arrow/util/bitmap_reader_benchmark.cc +++ b/cpp/src/arrow/util/bitmap_reader_benchmark.cc @@ -45,7 +45,7 @@ static std::shared_ptr CreateRandomBuffer(int64_t nbytes) { auto buffer = *AllocateBuffer(nbytes); memset(buffer->mutable_data(), 0, nbytes); random_bytes(nbytes, /*seed=*/0, buffer->mutable_data()); - return std::move(buffer); + return buffer; } static void BitBlockCounterBench(benchmark::State& state) { diff --git a/cpp/src/arrow/util/compression.cc b/cpp/src/arrow/util/compression.cc index b63aec0aae8f9..7e2a3de30306a 100644 --- a/cpp/src/arrow/util/compression.cc +++ b/cpp/src/arrow/util/compression.cc @@ -216,7 +216,7 @@ Result> Codec::Create(Compression::type codec_type, DCHECK_NE(codec, nullptr); RETURN_NOT_OK(codec->Init()); - return std::move(codec); + return codec; } // use compression level to create Codec diff --git a/cpp/src/arrow/util/config.h.cmake b/cpp/src/arrow/util/config.h.cmake index 9fbd685084fd5..08c2ae173601b 100644 --- a/cpp/src/arrow/util/config.h.cmake +++ b/cpp/src/arrow/util/config.h.cmake @@ -31,9 +31,6 @@ #define ARROW_BUILD_TYPE "@UPPERCASE_BUILD_TYPE@" -#define ARROW_GIT_ID "@ARROW_GIT_ID@" -#define ARROW_GIT_DESCRIPTION "@ARROW_GIT_DESCRIPTION@" - #define ARROW_PACKAGE_KIND "@ARROW_PACKAGE_KIND@" #cmakedefine ARROW_COMPUTE diff --git a/cpp/src/arrow/util/config_internal.h.cmake b/cpp/src/arrow/util/config_internal.h.cmake new file mode 100644 index 0000000000000..e90f7ee12da4d --- /dev/null +++ b/cpp/src/arrow/util/config_internal.h.cmake @@ -0,0 +1,22 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +// These variables are not exposed as they can make compilation caching +// and increment builds less efficient. + +#define ARROW_GIT_ID "@ARROW_GIT_ID@" +#define ARROW_GIT_DESCRIPTION "@ARROW_GIT_DESCRIPTION@" diff --git a/cpp/src/arrow/util/decimal.cc b/cpp/src/arrow/util/decimal.cc index ce71def497161..c8457eae8ed33 100644 --- a/cpp/src/arrow/util/decimal.cc +++ b/cpp/src/arrow/util/decimal.cc @@ -717,7 +717,7 @@ Status Decimal128::FromString(const char* s, Decimal128* out, int32_t* precision Result Decimal128::FromString(std::string_view s) { Decimal128 out; RETURN_NOT_OK(FromString(s, &out, nullptr, nullptr)); - return std::move(out); + return out; } Result Decimal128::FromString(const std::string& s) { @@ -850,7 +850,7 @@ Status Decimal256::FromString(const char* s, Decimal256* out, int32_t* precision Result Decimal256::FromString(std::string_view s) { Decimal256 out; RETURN_NOT_OK(FromString(s, &out, nullptr, nullptr)); - return std::move(out); + return out; } Result Decimal256::FromString(const std::string& s) { diff --git a/cpp/src/arrow/util/fixed_width_internal.cc b/cpp/src/arrow/util/fixed_width_internal.cc new file mode 100644 index 0000000000000..3f12fafb54f0f --- /dev/null +++ b/cpp/src/arrow/util/fixed_width_internal.cc @@ -0,0 +1,232 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include +#include +#include + +#include "arrow/array/data.h" +#include "arrow/compute/kernel.h" +#include "arrow/result.h" +#include "arrow/type.h" +#include "arrow/util/checked_cast.h" +#include "arrow/util/fixed_width_internal.h" +#include "arrow/util/logging.h" +#include "arrow/util/small_vector.h" + +namespace arrow::util { + +using ::arrow::internal::checked_cast; + +bool IsFixedWidthLike(const ArraySpan& source, bool force_null_count, + bool exclude_bool_and_dictionary) { + return IsFixedWidthLike( + source, force_null_count, [exclude_bool_and_dictionary](const DataType& type) { + return !exclude_bool_and_dictionary || + (type.id() != Type::DICTIONARY && type.id() != Type::BOOL); + }); +} + +static int64_t FixedWidthInBytesFallback(const FixedSizeListType& fixed_size_list_type) { + auto* fsl = &fixed_size_list_type; + int64_t list_size = fsl->list_size(); + for (auto type = fsl->value_type().get();;) { + if (type->id() == Type::FIXED_SIZE_LIST) { + fsl = checked_cast(type); + list_size *= fsl->list_size(); + type = fsl->value_type().get(); + continue; + } + if (type->id() != Type::BOOL && is_fixed_width(type->id())) { + const int64_t flat_byte_width = list_size * type->byte_width(); + DCHECK_GE(flat_byte_width, 0); + return flat_byte_width; + } + break; + } + return -1; +} + +int64_t FixedWidthInBytes(const DataType& type) { + auto type_id = type.id(); + if (is_fixed_width(type_id)) { + const int32_t num_bits = type.bit_width(); + return (type_id == Type::BOOL) ? -1 : num_bits / 8; + } + if (type_id == Type::FIXED_SIZE_LIST) { + auto& fsl = ::arrow::internal::checked_cast(type); + return FixedWidthInBytesFallback(fsl); + } + return -1; +} + +static int64_t FixedWidthInBitsFallback(const FixedSizeListType& fixed_size_list_type) { + auto* fsl = &fixed_size_list_type; + int64_t list_size = fsl->list_size(); + for (auto type = fsl->value_type().get();;) { + auto type_id = type->id(); + if (type_id == Type::FIXED_SIZE_LIST) { + fsl = checked_cast(type); + list_size *= fsl->list_size(); + type = fsl->value_type().get(); + continue; + } + if (is_fixed_width(type_id)) { + const int64_t flat_bit_width = list_size * type->bit_width(); + DCHECK_GE(flat_bit_width, 0); + return flat_bit_width; + } + break; + } + return -1; +} + +int64_t FixedWidthInBits(const DataType& type) { + auto type_id = type.id(); + if (is_fixed_width(type_id)) { + return type.bit_width(); + } + if (type_id == Type::FIXED_SIZE_LIST) { + auto& fsl = ::arrow::internal::checked_cast(type); + return FixedWidthInBitsFallback(fsl); + } + return -1; +} + +namespace internal { + +Status PreallocateFixedWidthArrayData(::arrow::compute::KernelContext* ctx, + int64_t length, const ArraySpan& source, + bool allocate_validity, ArrayData* out) { + DCHECK(!source.MayHaveNulls() || allocate_validity) + << "allocate_validity cannot be false if source may have nulls"; + DCHECK_EQ(source.type->id(), out->type->id()); + auto* type = source.type; + out->length = length; + if (type->id() == Type::FIXED_SIZE_LIST) { + out->buffers.resize(1); + out->child_data = {std::make_shared()}; + } else { + out->buffers.resize(2); + } + if (allocate_validity) { + ARROW_ASSIGN_OR_RAISE(out->buffers[0], ctx->AllocateBitmap(length)); + } + + if (type->id() == Type::BOOL) { + ARROW_ASSIGN_OR_RAISE(out->buffers[1], ctx->AllocateBitmap(length)); + return Status::OK(); + } + if (is_fixed_width(type->id())) { + if (type->id() == Type::DICTIONARY) { + return Status::NotImplemented( + "PreallocateFixedWidthArrayData: DICTIONARY type allocation: ", *type); + } + ARROW_ASSIGN_OR_RAISE(out->buffers[1], + ctx->Allocate(length * source.type->byte_width())); + return Status::OK(); + } + if (type->id() == Type::FIXED_SIZE_LIST) { + auto& fsl_type = checked_cast(*type); + auto& value_type = fsl_type.value_type(); + if (ARROW_PREDICT_FALSE(value_type->id() == Type::DICTIONARY)) { + return Status::NotImplemented( + "PreallocateFixedWidthArrayData: DICTIONARY type allocation: ", *type); + } + if (source.child_data[0].MayHaveNulls()) { + return Status::Invalid( + "PreallocateFixedWidthArrayData: " + "FixedSizeList may have null values in child array: ", + fsl_type); + } + auto* child_values = out->child_data[0].get(); + child_values->type = value_type; + return PreallocateFixedWidthArrayData(ctx, length * fsl_type.list_size(), + /*source=*/source.child_data[0], + /*allocate_validity=*/false, + /*out=*/child_values); + } + return Status::Invalid("PreallocateFixedWidthArrayData: Invalid type: ", *type); +} + +} // namespace internal + +std::pair OffsetPointerOfFixedBitWidthValues( + const ArraySpan& source) { + using OffsetAndListSize = std::pair; + auto get_offset = [](auto pair) { return pair.first; }; + auto get_list_size = [](auto pair) { return pair.second; }; + ::arrow::internal::SmallVector stack; + + int64_t list_size = 1; + auto* array = &source; + while (array->type->id() == Type::FIXED_SIZE_LIST) { + list_size *= checked_cast(array->type)->list_size(); + stack.emplace_back(array->offset, list_size); + array = &array->child_data[0]; + } + // Now that innermost values were reached, pop the stack and calculate the offset + // in bytes of the innermost values buffer by considering the offset at each + // level of nesting. + DCHECK(is_fixed_width(*array->type)); + DCHECK(array == &source || !array->MayHaveNulls()) + << "OffsetPointerOfFixedWidthValues: array is expected to be flat or have no " + "nulls in the arrays nested by FIXED_SIZE_LIST."; + int64_t value_width_in_bits = array->type->bit_width(); + int64_t offset_in_bits = array->offset * value_width_in_bits; + for (auto it = stack.rbegin(); it != stack.rend(); ++it) { + value_width_in_bits *= get_list_size(*it); + offset_in_bits += get_offset(*it) * value_width_in_bits; + } + DCHECK_GE(value_width_in_bits, 0); + const auto* values_ptr = array->GetValues(1, 0); + return {static_cast(offset_in_bits % 8), values_ptr + (offset_in_bits / 8)}; +} + +const uint8_t* OffsetPointerOfFixedByteWidthValues(const ArraySpan& source) { + DCHECK(IsFixedWidthLike(source, /*force_null_count=*/false, + [](const DataType& type) { return type.id() != Type::BOOL; })); + return OffsetPointerOfFixedBitWidthValues(source).second; +} + +/// \brief Get the mutable pointer to the fixed-width values of an array +/// allocated by PreallocateFixedWidthArrayData. +/// +/// \pre mutable_array->offset and the offset of child array (if it's a +/// FixedSizeList) MUST be 0 (recursively). +/// \pre IsFixedWidthLike(ArraySpan(mutable_array)) or the more restrictive +/// is_fixed_width(*mutable_array->type) MUST be true +/// \return The mutable pointer to the fixed-width byte blocks of the array. If +/// pre-conditions are not satisfied, the return values is undefined. +uint8_t* MutableFixedWidthValuesPointer(ArrayData* mutable_array) { + auto* array = mutable_array; + auto type_id = array->type->id(); + while (type_id == Type::FIXED_SIZE_LIST) { + DCHECK_EQ(array->offset, 0); + DCHECK_EQ(array->child_data.size(), 1) << array->type->ToString(true) << " part of " + << mutable_array->type->ToString(true); + array = array->child_data[0].get(); + type_id = array->type->id(); + } + DCHECK_EQ(mutable_array->offset, 0); + // BOOL is allowed here only because the offset is expected to be 0, + // so the byte-aligned pointer also points to the first *bit* of the buffer. + DCHECK(is_fixed_width(type_id)); + return array->GetMutableValues(1, 0); +} + +} // namespace arrow::util diff --git a/cpp/src/arrow/util/fixed_width_internal.h b/cpp/src/arrow/util/fixed_width_internal.h new file mode 100644 index 0000000000000..232411f4c4a56 --- /dev/null +++ b/cpp/src/arrow/util/fixed_width_internal.h @@ -0,0 +1,309 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include + +#include "arrow/array/data.h" +#include "arrow/type.h" +#include "arrow/type_fwd.h" +#include "arrow/type_traits.h" + +namespace arrow::compute { +// XXX: remove dependency on compute::KernelContext +class KernelContext; +} // namespace arrow::compute + +namespace arrow::util { + +/// \brief Checks if the given array has a fixed-width type or if it's an array of +/// fixed-size list that can be flattened to an array of fixed-width values. +/// +/// Fixed-width types are the ones defined by the is_fixed_width() predicate in +/// type_traits.h. They are all the types that passes any of the following +/// predicates: +/// +/// - is_primitive() +/// - is_fixed_size_binary() +/// - is_dictionary() +/// +/// At least 3 types in this set require special care: +/// - `Type::BOOL` is fixed-width, but it's a 1-bit type and pointers to first bit +/// in boolean buffers are not always aligned to byte boundaries. +/// - `Type::DICTIONARY` is fixed-width because the indices are fixed-width, but the +/// dictionary values are not necessarily fixed-width and have to be managed +/// by separate operations. +/// - Type::FIXED_SIZE_BINARY unlike other fixed-width types, fixed-size binary +/// values are defined by a size attribute that is not known at compile time. +/// The other types have power-of-2 byte widths, while fixed-size binary can +/// have any byte width including 0. +/// +/// Additionally, we say that a type is "fixed-width like" if it's a fixed-width as +/// defined above, or if it's a fixed-size list (or nested fixed-size lists) and +/// the innermost type is fixed-width and the following restrictions also apply: +/// - Only the top-level array may have nulls, all the inner array have to be completely +/// free of nulls so we don't need to manage internal validity bitmaps. +/// +/// \param source The array to check +/// \param force_null_count If true, GetNullCount() is used instead of null_count +/// \param exclude_bool_and_dictionary If true, BOOL and DICTIONARY are excluded from +/// the is_fixed_width() types. Default: false. +ARROW_EXPORT bool IsFixedWidthLike(const ArraySpan& source, bool force_null_count = false, + bool exclude_bool_and_dictionary = false); + +// Take the following `fixed_size_list, 3>` array as an +// example: +// +// [ +// [[1, 2], [3, 4], [ 5, 6]], +// null, +// [[7, 8], [9, 10], [11, 12]] +// ] +// +// in memory, it would look like: +// +// { +// type: fixed_size_list, 3>, +// length: 3, +// null_count: 1, +// offset: 0, +// buffers: [ +// 0: [0b00000101] +// ], +// child_data: [ +// 0: { +// type: fixed_size_list, +// length: 9, +// null_count: 0, +// offset: 0, +// buffers: [0: NULL], +// child_data: [ +// 0: { +// type: int32, +// length: 18, +// null_count: 0, +// offset: 0, +// buffers: [ +// 0: NULL, +// 1: [ 1, 2, 3, 4, 5, 6, +// 0, 0, 0, 0, 0, 0 +// 7, 8, 9, 10, 11, 12 ] +// ], +// child_data: [] +// } +// ] +// } +// ] +// } +// +// This layout fits the fixed-width like definition because the innermost type +// is byte-aligned fixed-width (int32 = 4 bytes) and the internal arrays don't +// have nulls. The validity bitmap is only needed at the top-level array. +// +// Writing to this array can be done in the same way writing to a flat fixed-width +// array is done, by: +// 1. Updating the validity bitmap at the top-level array if nulls are present. +// 2. Updating a continuous fixed-width block of memory through a single pointer. +// +// The length of this block of memory is the product of the list sizes in the +// `FixedSizeList` types and the byte width of the innermost fixed-width type: +// +// 3 * 2 * 4 = 24 bytes +// +// Writing the `[[1, 2], [3, 4], [5, 6]]` value at a given index can be done by +// simply setting the validity bit to 1 and writing the 24-byte sequence of +// integers `[1, 2, 3, 4, 5, 6]` to the memory block at `byte_ptr + index * 24`. +// +// The length of the top-level array fully defines the lengths that all the nested +// arrays must have, which makes defining all the lengths as easy as defining the +// length of the top-level array. +// +// length = 3 +// child_data[0].length == 3 * 3 == 9 +// child_data[0].child_data[0].length == 3 * 3 * 2 == 18 +// +// child_data[0].child_data[0].buffers[1].size() >= +// (3 * (3 * 2 * sizeof(int32)) == 3 * 24 == 72) +// +// Dealing with offsets is a bit involved. Let's say the array described above has +// the offsets 2, 5, and 7: +// +// { +// type: fixed_size_list, 3>, +// offset: 2, +// ... +// child_data: [ +// 0: { +// type: fixed_size_list, +// offset: 5, +// ... +// child_data: [ +// 0: { +// type: int32, +// offset: 7, +// buffers: [ +// 0: NULL, +// 1: [ 1, 1, 1, 1, 1, 1, 1, // 7 values skipped +// 0,1, 0,1, 0,1, 0,1, 0,1, // 5 [x,x] values skipped +// +// 0,0,0,0,0,1, // +// 0,0,0,0,0,1, // 2 [[x,x], [x,x], [x,x]] values skipped +// +// 1, 2, 3, 4, 5, 6, // +// 0, 0, 0, 0, 0, 0 // the actual values +// 7, 8, 9, 10, 11, 12 // +// ] +// ], +// } +// ] +// } +// ] +// } +// +// The offset of the innermost values buffer, in bytes, is calculated as: +// +// ((2 * 3) + (5 * 2) + 7) * sizeof(int32) = 29 * 4 bytes = 116 bytes +// +// In general, the formula to calculate the offset of the innermost values buffer is: +// +// ((off_0 * fsl_size_0) + (off_1 * fsl_size_1) + ... + innermost_off) +// * sizeof(innermost_type) +// +// `OffsetPointerOfFixedByteWidthValues()` can calculate this byte offset and return +// the pointer to the first relevant byte of the innermost values buffer. + +/// \brief Checks if the given array has a fixed-width type or if it's an array of +/// fixed-size list that can be flattened to an array of fixed-width values. +/// +/// \param source The array to check +/// \param force_null_count If true, GetNullCount() is used instead of null_count +/// \param extra_predicate A DataType predicate that can be used to further +/// restrict the types that are considered fixed-width +template +inline bool IsFixedWidthLike(const ArraySpan& source, bool force_null_count, + ExtraPred extra_predicate) { + const auto* type = source.type; + // BOOL is considered fixed-width if not nested under FIXED_SIZE_LIST. + if (is_fixed_width(type->id()) && extra_predicate(*type)) { + return true; + } + if (type->id() == Type::FIXED_SIZE_LIST) { + // All the inner arrays must not contain any nulls. + const auto* values = &source.child_data[0]; + while ((force_null_count ? values->GetNullCount() : values->null_count) == 0) { + type = values->type; + if (type->id() == Type::FIXED_SIZE_LIST) { + values = &values->child_data[0]; + continue; + } + return is_fixed_width(type->id()) && extra_predicate(*type); + } + } + return false; +} + +/// \brief Get the fixed-width in bytes of a type if it is a fixed-width like +/// type, but not BOOL. +/// +/// If the array is a FixedSizeList (of any level of nesting), the byte width of +/// the values is the product of all fixed-list sizes and the byte width of the +/// innermost fixed-width value type. +/// +/// IsFixedWidthLike(array) performs more checks than this function and should +/// be used to guarantee that, if type is not BOOL, this function will not return -1. +/// +/// NOTE: this function translates `DataType::bit_width()` to bytes differently from +/// `DataType::byte_width()`. `DataType::byte_width()` will return 0 for +/// BOOL, while this function will return `-1`. This is done because 0 is +/// a valid return value for FIXED_SIZE_LIST with size 0 or `FIXED_SIZE_BINARY` with +/// size 0. +/// +/// \pre The instance of the array where this type is from must pass +/// `IsFixedWidthLike(array)` and should not be BOOL. +/// \return The fixed-byte width of the values or -1 if the type is BOOL or not +/// fixed-width like. 0 is a valid return value as fixed-size-lists +/// and fixed-size-binary with size 0 are allowed. +ARROW_EXPORT int64_t FixedWidthInBytes(const DataType& type); + +/// \brief Get the fixed-width in bits of a type if it is a fixed-width like +/// type. +/// +/// If the array is a FixedSizeList (of any level of nesting), the bit width of +/// the values is the product of all fixed-list sizes and the bit width of the +/// innermost fixed-width value type. +/// +/// \return The bit-width of the values or -1 +/// \see FixedWidthInBytes +ARROW_EXPORT int64_t FixedWidthInBits(const DataType& type); + +namespace internal { + +/// \brief Allocate an ArrayData for a type that is fixed-width like. +/// +/// This function performs the same checks performed by +/// `IsFixedWidthLike(source, false, false)`. If `source.type` is not a simple +/// fixed-width type, caller should make sure it passes the +/// `IsFixedWidthLike(source)` checks. That guarantees that it's possible to +/// allocate an array that can serve as a destination for a kernel that writes values +/// through a single pointer to fixed-width byte blocks. +/// +/// \param[in] length The length of the array to allocate (unrelated to the length of +/// the source array) +/// \param[in] source The source array that carries the type information and the +/// validity bitmaps that are relevant for the type validation +/// when the source is a FixedSizeList. +/// \see IsFixedWidthLike +ARROW_EXPORT Status PreallocateFixedWidthArrayData(::arrow::compute::KernelContext* ctx, + int64_t length, + const ArraySpan& source, + bool allocate_validity, + ArrayData* out); + +} // namespace internal + +/// \brief Get the 0-7 residual offset in bits and the pointer to the fixed-width +/// values of a fixed-width like array. +/// +/// For byte-aligned types, the offset is always 0. +/// +/// \pre `IsFixedWidthLike(source)` or the more restrictive +/// is_fixed_width(*mutable_array->type) SHOULD be true +/// \return A pair with the residual offset in bits (0-7) and the pointer +/// to the fixed-width values. +ARROW_EXPORT std::pair OffsetPointerOfFixedBitWidthValues( + const ArraySpan& source); + +/// \brief Get the pointer to the fixed-width values of a fixed-width like array. +/// +/// \pre `IsFixedWidthLike(source)` should be true and BOOL should be excluded +/// as each bool is 1-bit width making it impossible to produce a +/// byte-aligned pointer to the values in the general case. +ARROW_EXPORT const uint8_t* OffsetPointerOfFixedByteWidthValues(const ArraySpan& source); + +/// \brief Get the mutable pointer to the fixed-width values of an array +/// allocated by PreallocateFixedWidthArrayData. +/// +/// \pre mutable_array->offset and the offset of child array (if it's a +/// FixedSizeList) MUST be 0 (recursively). +/// \pre IsFixedWidthLike(ArraySpan(mutable_array)) or the more restrictive +/// is_fixed_width(*mutable_array->type) MUST be true +/// \return The mutable pointer to the fixed-width byte blocks of the array. If +/// pre-conditions are not satisfied, the return values is undefined. +ARROW_EXPORT uint8_t* MutableFixedWidthValuesPointer(ArrayData* mutable_array); + +} // namespace arrow::util diff --git a/cpp/src/arrow/util/fixed_width_test.cc b/cpp/src/arrow/util/fixed_width_test.cc new file mode 100644 index 0000000000000..3b35de1b6bbeb --- /dev/null +++ b/cpp/src/arrow/util/fixed_width_test.cc @@ -0,0 +1,214 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +// #include +// #include + +#include + +#include "arrow/array/array_base.h" +#include "arrow/array/data.h" +#include "arrow/testing/gtest_util.h" +#include "arrow/type.h" +#include "arrow/util/fixed_width_internal.h" + +namespace arrow::util { + +namespace { +bool NotBool(const DataType& type) { return type.id() != Type::BOOL; } +bool NotInt32(const DataType& type) { return type.id() != Type::INT32; } +} // namespace + +class TestFixedWidth : public ::testing::Test { + protected: + std::shared_ptr bool_array_array_; + std::shared_ptr int_array_array_; + std::shared_ptr fsl_bool_array_; + std::shared_ptr fsl_int_array_; + std::shared_ptr fsl_int_nulls_array_; + std::shared_ptr fsl_int_inner_nulls_array_; + std::shared_ptr dict_string_array_; + + std::shared_ptr fsl(int32_t list_size, + const std::shared_ptr& value_type) { + return fixed_size_list(value_type, list_size); + } + + public: + void SetUp() override { + bool_array_array_ = ArrayFromJSON(boolean(), "[true, false, null]"); + int_array_array_ = ArrayFromJSON(int32(), "[1, 0, null]"); + fsl_bool_array_ = ArrayFromJSON(fsl(2, boolean()), "[[true, false]]"); + fsl_int_array_ = ArrayFromJSON(fsl(2, int32()), "[[1, 0], [2, 3]]"); + fsl_int_nulls_array_ = ArrayFromJSON(fsl(2, int32()), "[[1, 0], null, [1, 2]]"); + fsl_int_inner_nulls_array_ = + ArrayFromJSON(fsl(2, int32()), "[[1, 0], [2, 3], [null, 2]]"); + dict_string_array_ = + ArrayFromJSON(dictionary(int32(), utf8()), R"(["Alice", "Bob", "Alice"])"); + } +}; + +TEST_F(TestFixedWidth, IsFixedWidth) { + auto arr = ArraySpan{*bool_array_array_->data()}; + // force_null_count doesn't matter because nulls at the top-level + // of the array are allowed by IsFixedWidthLike. + ASSERT_TRUE(IsFixedWidthLike(arr, /*force_null_count=*/false)); + ASSERT_TRUE(IsFixedWidthLike(arr, /*force_null_count=*/true)); + + ASSERT_TRUE(IsFixedWidthLike(arr, /*force_null_count=*/false, NotInt32)); + ASSERT_FALSE(IsFixedWidthLike(arr, /*force_null_count=*/false, NotBool)); + + arr = ArraySpan{*int_array_array_->data()}; + ASSERT_TRUE(IsFixedWidthLike(arr, /*force_null_count=*/false)); + ASSERT_TRUE(IsFixedWidthLike(arr, /*force_null_count=*/true)); + ASSERT_TRUE(IsFixedWidthLike(arr, /*force_null_count=*/false, NotBool)); +} + +TEST_F(TestFixedWidth, IsFixedWidthLike) { + auto arr = ArraySpan{*fsl_bool_array_->data()}; + ASSERT_TRUE(IsFixedWidthLike(arr, /*force_null_count=*/false)); + + arr = ArraySpan{*fsl_int_array_->data()}; + ASSERT_TRUE(IsFixedWidthLike(arr, /*force_null_count=*/false)); + arr.null_count = kUnknownNullCount; + // force_null_count=true isn't necessary because nulls at the top-level + // of the array are allowed by IsFixedWidthLike. + ASSERT_TRUE(IsFixedWidthLike(arr, /*force_null_count=*/false)); + + arr.child_data[0].null_count = kUnknownNullCount; + // inner nulls are not allowed by IsFixedWidthLike... + ASSERT_FALSE(IsFixedWidthLike(arr, /*force_null_count=*/false)); + // ...but forcing null counting at on every internal array increases + // the chances of IsFixedWidthLike returning true. + ASSERT_TRUE(IsFixedWidthLike(arr, /*force_null_count=*/true)); + // Excluding INT32 from the internal array checks. + ASSERT_FALSE(IsFixedWidthLike(arr, /*force_null_count=*/true, NotInt32)); + + arr = ArraySpan{*fsl_int_nulls_array_->data()}; + // Nulls at the top-level of the array are allowed by IsFixedWidthLike. + // + // TODO(GH-10157): ArrayFromJSON uses FixedSizeListBuilder which currently + // produces nulls on the child data if one of the list-typed elements is null. + // ASSERT_TRUE(IsFixedWidthLike(arr, /*force_null_count=*/false)); + + arr = ArraySpan{*fsl_int_inner_nulls_array_->data()}; + // Inner nulls are not allowed by IsFixedWidthLike. + ASSERT_FALSE(IsFixedWidthLike(arr, /*force_null_count=*/true)); + + arr = ArraySpan{*dict_string_array_->data()}; + // Dictionaries are considered fixed-width by is_fixed_width(), but excluded + // by IsFixedWidthLike if exclude_bool_and_dictionary=true. + ASSERT_TRUE(IsFixedWidthLike(arr)); + ASSERT_TRUE(IsFixedWidthLike(arr, /*force_null_count=*/false, + /*exclude_bool_and_dictionary=*/false)); + ASSERT_FALSE(IsFixedWidthLike(arr, /*force_null_count=*/false, + /*exclude_bool_and_dictionary=*/true)); +} + +TEST_F(TestFixedWidth, MeasureWidthInBytes) { + auto b = boolean(); + auto i8 = int8(); + auto i32 = int32(); + auto fsb = fixed_size_binary(3); + auto dict = dictionary(int32(), utf8()); + auto varlen = utf8(); + ASSERT_EQ(FixedWidthInBytes(*b), -1); + ASSERT_EQ(FixedWidthInBytes(*i8), 1); + ASSERT_EQ(FixedWidthInBytes(*i32), 4); + ASSERT_EQ(FixedWidthInBytes(*fsb), 3); + ASSERT_EQ(FixedWidthInBytes(*dict), 4); + + ASSERT_EQ(FixedWidthInBytes(*varlen), -1); + ASSERT_EQ(FixedWidthInBytes(*varlen), -1); + + ASSERT_EQ(FixedWidthInBytes(*fsl(0, b)), -1); + ASSERT_EQ(FixedWidthInBytes(*fsl(3, b)), -1); + ASSERT_EQ(FixedWidthInBytes(*fsl(5, b)), -1); + + ASSERT_EQ(FixedWidthInBytes(*fsl(0, i8)), 0); + ASSERT_EQ(FixedWidthInBytes(*fsl(3, i8)), 3); + ASSERT_EQ(FixedWidthInBytes(*fsl(5, i8)), 5); + ASSERT_EQ(FixedWidthInBytes(*fsl(0, i32)), 0); + ASSERT_EQ(FixedWidthInBytes(*fsl(3, i32)), 3 * 4); + ASSERT_EQ(FixedWidthInBytes(*fsl(5, i32)), 5 * 4); + ASSERT_EQ(FixedWidthInBytes(*fsl(5, fsb)), 5 * 3); + ASSERT_EQ(FixedWidthInBytes(*fsl(5, dict)), 5 * 4); + + ASSERT_EQ(FixedWidthInBytes(*fsl(2, fsl(0, i8))), 0); + ASSERT_EQ(FixedWidthInBytes(*fsl(2, fsl(3, i8))), 2 * 3); + ASSERT_EQ(FixedWidthInBytes(*fsl(2, fsl(5, i8))), 2 * 5); + ASSERT_EQ(FixedWidthInBytes(*fsl(2, fsl(0, i32))), 0); + ASSERT_EQ(FixedWidthInBytes(*fsl(2, fsl(3, i32))), 2 * 3 * 4); + ASSERT_EQ(FixedWidthInBytes(*fsl(2, fsl(5, i32))), 2 * 5 * 4); + ASSERT_EQ(FixedWidthInBytes(*fsl(2, fsl(0, fsb))), 0); + ASSERT_EQ(FixedWidthInBytes(*fsl(2, fsl(3, fsb))), 2 * 3 * 3); + ASSERT_EQ(FixedWidthInBytes(*fsl(2, fsl(5, fsb))), 2 * 5 * 3); + ASSERT_EQ(FixedWidthInBytes(*fsl(2, fsl(0, dict))), 0); + ASSERT_EQ(FixedWidthInBytes(*fsl(2, fsl(3, dict))), 2 * 3 * 4); + ASSERT_EQ(FixedWidthInBytes(*fsl(2, fsl(5, dict))), 2 * 5 * 4); + + ASSERT_EQ(FixedWidthInBytes(*fsl(0, varlen)), -1); + ASSERT_EQ(FixedWidthInBytes(*fsl(2, varlen)), -1); +} + +TEST_F(TestFixedWidth, MeasureWidthInBits) { + auto b = boolean(); + auto i8 = int8(); + auto i32 = int32(); + auto fsb = fixed_size_binary(3); + auto dict = dictionary(int32(), utf8()); + auto varlen = utf8(); + ASSERT_EQ(FixedWidthInBits(*b), 1); + ASSERT_EQ(FixedWidthInBits(*i8), 8); + ASSERT_EQ(FixedWidthInBits(*i32), 4 * 8); + ASSERT_EQ(FixedWidthInBits(*fsb), 3 * 8); + ASSERT_EQ(FixedWidthInBits(*dict), 4 * 8); + + ASSERT_EQ(FixedWidthInBits(*varlen), -1); + ASSERT_EQ(FixedWidthInBits(*varlen), -1); + + ASSERT_EQ(FixedWidthInBits(*fsl(0, b)), 0); + ASSERT_EQ(FixedWidthInBits(*fsl(3, b)), 3); + ASSERT_EQ(FixedWidthInBits(*fsl(5, b)), 5); + + ASSERT_EQ(FixedWidthInBits(*fsl(0, i8)), 0); + ASSERT_EQ(FixedWidthInBits(*fsl(3, i8)), 3 * 8); + ASSERT_EQ(FixedWidthInBits(*fsl(5, i8)), 5 * 8); + ASSERT_EQ(FixedWidthInBits(*fsl(0, i32)), 0); + ASSERT_EQ(FixedWidthInBits(*fsl(3, i32)), 4 * 3 * 8); + ASSERT_EQ(FixedWidthInBits(*fsl(5, i32)), 4 * 5 * 8); + ASSERT_EQ(FixedWidthInBits(*fsl(5, fsb)), 5 * 3 * 8); + ASSERT_EQ(FixedWidthInBits(*fsl(5, dict)), 5 * 4 * 8); + + ASSERT_EQ(FixedWidthInBits(*fsl(2, fsl(0, i8))), 0); + ASSERT_EQ(FixedWidthInBits(*fsl(2, fsl(3, i8))), 2 * 3 * 8); + ASSERT_EQ(FixedWidthInBits(*fsl(2, fsl(5, i8))), 2 * 5 * 8); + ASSERT_EQ(FixedWidthInBits(*fsl(2, fsl(0, i32))), 0); + ASSERT_EQ(FixedWidthInBits(*fsl(2, fsl(3, i32))), 2 * 3 * 4 * 8); + ASSERT_EQ(FixedWidthInBits(*fsl(2, fsl(5, i32))), 2 * 5 * 4 * 8); + ASSERT_EQ(FixedWidthInBits(*fsl(2, fsl(0, fsb))), 0); + ASSERT_EQ(FixedWidthInBits(*fsl(2, fsl(3, fsb))), 2 * 3 * 3 * 8); + ASSERT_EQ(FixedWidthInBits(*fsl(2, fsl(5, fsb))), 2 * 5 * 3 * 8); + ASSERT_EQ(FixedWidthInBits(*fsl(2, fsl(0, dict))), 0); + ASSERT_EQ(FixedWidthInBits(*fsl(2, fsl(3, dict))), 2 * 3 * 4 * 8); + ASSERT_EQ(FixedWidthInBits(*fsl(2, fsl(5, dict))), 2 * 5 * 4 * 8); + + ASSERT_EQ(FixedWidthInBits(*fsl(0, varlen)), -1); + ASSERT_EQ(FixedWidthInBits(*fsl(2, varlen)), -1); +} + +} // namespace arrow::util diff --git a/cpp/src/arrow/util/future.cc b/cpp/src/arrow/util/future.cc index a5426f949e721..60687172fe8d7 100644 --- a/cpp/src/arrow/util/future.cc +++ b/cpp/src/arrow/util/future.cc @@ -212,7 +212,7 @@ std::unique_ptr FutureImpl::Make() { std::unique_ptr FutureImpl::MakeFinished(FutureState state) { std::unique_ptr ptr(new ConcreteFutureImpl()); ptr->state_ = state; - return std::move(ptr); + return ptr; } FutureImpl::FutureImpl() : state_(FutureState::PENDING) {} diff --git a/cpp/src/arrow/util/future.h b/cpp/src/arrow/util/future.h index 283b581a5100a..0aa2842703712 100644 --- a/cpp/src/arrow/util/future.h +++ b/cpp/src/arrow/util/future.h @@ -871,7 +871,7 @@ Future ToFuture(Result maybe_value) { template Future ToFuture(Future fut) { - return std::move(fut); + return fut; } template diff --git a/cpp/src/arrow/util/io_util.cc b/cpp/src/arrow/util/io_util.cc index d48f9eb97d562..2eefe96f0d5c4 100644 --- a/cpp/src/arrow/util/io_util.cc +++ b/cpp/src/arrow/util/io_util.cc @@ -1084,7 +1084,7 @@ Result FileOpenReadable(const PlatformFilename& file_name) { } #endif - return std::move(fd); + return fd; } Result FileOpenWritable(const PlatformFilename& file_name, @@ -1148,7 +1148,7 @@ Result FileOpenWritable(const PlatformFilename& file_name, // Seek to end, as O_APPEND does not necessarily do it RETURN_NOT_OK(lseek64_compat(fd.fd(), 0, SEEK_END)); } - return std::move(fd); + return fd; } Result FileTell(int fd) { @@ -1967,7 +1967,7 @@ Result> TemporaryDir::Make(const std::string& pref for (const auto& base_dir : base_dirs) { ARROW_ASSIGN_OR_RAISE(auto ptr, TryCreatingDirectory(base_dir)); if (ptr) { - return std::move(ptr); + return ptr; } // Cannot create in this directory, try the next one } diff --git a/cpp/src/arrow/util/iterator.h b/cpp/src/arrow/util/iterator.h index 5e716d0fd113d..5025799b9a372 100644 --- a/cpp/src/arrow/util/iterator.h +++ b/cpp/src/arrow/util/iterator.h @@ -105,9 +105,18 @@ class Iterator : public util::EqualityComparable> { Iterator() : ptr_(NULLPTR, [](void*) {}) {} /// \brief Return the next element of the sequence, IterationTraits::End() when the - /// iteration is completed. Calling this on a default constructed Iterator - /// will result in undefined behavior. - Result Next() { return next_(ptr_.get()); } + /// iteration is completed. + Result Next() { + if (ptr_) { + auto next_result = next_(ptr_.get()); + if (next_result.ok() && IsIterationEnd(next_result.ValueUnsafe())) { + ptr_.reset(NULLPTR); + } + return next_result; + } else { + return IterationTraits::End(); + } + } /// Pass each element of the sequence to a visitor. Will return any error status /// returned by the visitor, terminating iteration. @@ -180,9 +189,7 @@ class Iterator : public util::EqualityComparable> { ARROW_ASSIGN_OR_RAISE(auto element, maybe_element); out.push_back(std::move(element)); } - // ARROW-8193: On gcc-4.8 without the explicit move it tries to use the - // copy constructor, which may be deleted on the elements of type T - return std::move(out); + return out; } private: diff --git a/cpp/src/arrow/util/iterator_test.cc b/cpp/src/arrow/util/iterator_test.cc index ba21ddcced209..a247ba13aef73 100644 --- a/cpp/src/arrow/util/iterator_test.cc +++ b/cpp/src/arrow/util/iterator_test.cc @@ -146,6 +146,49 @@ void AssertIteratorNext(T expected, Iterator& it) { ASSERT_EQ(expected, actual); } +template +class DeleteDetectableIterator { + public: + explicit DeleteDetectableIterator(std::vector values, bool* deleted) + : values_(std::move(values)), i_(0), deleted_(deleted) {} + + DeleteDetectableIterator(DeleteDetectableIterator&& source) + : values_(std::move(source.values_)), i_(source.i_), deleted_(source.deleted_) { + source.deleted_ = nullptr; + } + + ~DeleteDetectableIterator() { + if (deleted_) { + *deleted_ = true; + } + } + + Result Next() { + if (i_ == values_.size()) { + return IterationTraits::End(); + } + return std::move(values_[i_++]); + } + + private: + std::vector values_; + size_t i_; + bool* deleted_; +}; + +// Generic iterator tests + +TEST(TestIterator, DeleteOnEnd) { + bool deleted = false; + Iterator it(DeleteDetectableIterator({1}, &deleted)); + ASSERT_FALSE(deleted); + AssertIteratorNext({1}, it); + ASSERT_FALSE(deleted); + ASSERT_OK_AND_ASSIGN(auto value, it.Next()); + ASSERT_TRUE(IsIterationEnd(value)); + ASSERT_TRUE(deleted); +} + // -------------------------------------------------------------------- // Synchronous iterator tests diff --git a/cpp/src/arrow/util/macros.h b/cpp/src/arrow/util/macros.h index d80828869b33c..484df3400d92d 100644 --- a/cpp/src/arrow/util/macros.h +++ b/cpp/src/arrow/util/macros.h @@ -67,7 +67,11 @@ // [5] J. Doerfert et al. 2019. "Performance Exploration Through Optimistic Static // Program Annotations". https://github.com/jdoerfert/PETOSPA/blob/master/ISC19.pdf #define ARROW_UNUSED(x) (void)(x) +#ifdef ARROW_WARN_DOCUMENTATION +#define ARROW_ARG_UNUSED(x) x +#else #define ARROW_ARG_UNUSED(x) +#endif #if defined(__GNUC__) // GCC and compatible compilers (clang, Intel ICC) #define ARROW_NORETURN __attribute__((noreturn)) #define ARROW_NOINLINE __attribute__((noinline)) @@ -98,7 +102,7 @@ #elif defined(_MSC_VER) // MSVC #define ARROW_NORETURN __declspec(noreturn) #define ARROW_NOINLINE __declspec(noinline) -#define ARROW_FORCE_INLINE __declspec(forceinline) +#define ARROW_FORCE_INLINE __forceinline #define ARROW_PREDICT_FALSE(x) (x) #define ARROW_PREDICT_TRUE(x) (x) #define ARROW_PREFETCH(addr) diff --git a/cpp/src/arrow/util/vector.h b/cpp/src/arrow/util/vector.h index e3c0a67cf46c4..74b6a2403a2bb 100644 --- a/cpp/src/arrow/util/vector.h +++ b/cpp/src/arrow/util/vector.h @@ -113,7 +113,7 @@ Result> MaybeMapVector(Fn&& map, const std::vector& source out.reserve(source.size()); ARROW_RETURN_NOT_OK(MaybeTransform(source.begin(), source.end(), std::back_inserter(out), std::forward(map))); - return std::move(out); + return out; } template , @@ -152,7 +152,7 @@ Result> UnwrapOrRaise(std::vector>&& results) { } out.push_back(it->MoveValueUnsafe()); } - return std::move(out); + return out; } template @@ -165,7 +165,7 @@ Result> UnwrapOrRaise(const std::vector>& results) { } out.push_back(result.ValueUnsafe()); } - return std::move(out); + return out; } } // namespace internal diff --git a/cpp/src/gandiva/cache.cc b/cpp/src/gandiva/cache.cc index a1333ccdc5d43..2358b08c82424 100644 --- a/cpp/src/gandiva/cache.cc +++ b/cpp/src/gandiva/cache.cc @@ -20,26 +20,41 @@ #include "arrow/result.h" #include "arrow/util/io_util.h" #include "arrow/util/logging.h" +#include "arrow/util/value_parsing.h" namespace gandiva { -static const size_t DEFAULT_CACHE_SIZE = 5000; - -int GetCapacity() { - size_t capacity = DEFAULT_CACHE_SIZE; - auto maybe_env_cache_size = ::arrow::internal::GetEnvVar("GANDIVA_CACHE_SIZE"); - if (maybe_env_cache_size.ok()) { - const auto env_cache_size = *std::move(maybe_env_cache_size); - if (!env_cache_size.empty()) { - capacity = std::atol(env_cache_size.c_str()); - if (capacity <= 0) { - ARROW_LOG(WARNING) << "Invalid cache size provided in GANDIVA_CACHE_SIZE. " - << "Using default cache size: " << DEFAULT_CACHE_SIZE; - capacity = DEFAULT_CACHE_SIZE; - } - } +constexpr auto kCacheCapacityEnvVar = "GANDIVA_CACHE_SIZE"; +constexpr auto kDefaultCacheSize = 5000; + +namespace internal { +int GetCacheCapacityFromEnvVar() { + auto maybe_env_value = ::arrow::internal::GetEnvVar(kCacheCapacityEnvVar); + if (!maybe_env_value.ok()) { + return kDefaultCacheSize; + } + const auto env_value = *std::move(maybe_env_value); + if (env_value.empty()) { + return kDefaultCacheSize; + } + int capacity = 0; + bool ok = ::arrow::internal::ParseValue<::arrow::Int32Type>( + env_value.c_str(), env_value.size(), &capacity); + if (!ok || capacity <= 0) { + ARROW_LOG(WARNING) << "Invalid cache size provided in " << kCacheCapacityEnvVar + << ". Using default cache size: " << kDefaultCacheSize; + return kDefaultCacheSize; } - return static_cast(capacity); + return capacity; +} +} // namespace internal + +// Deprecated in 17.0.0. Use GetCacheCapacity instead. +int GetCapacity() { return GetCacheCapacity(); } + +int GetCacheCapacity() { + static const int capacity = internal::GetCacheCapacityFromEnvVar(); + return capacity; } void LogCacheSize(size_t capacity) { diff --git a/cpp/src/gandiva/cache.h b/cpp/src/gandiva/cache.h index 7cff9b02692ae..c19dbb7a0e30e 100644 --- a/cpp/src/gandiva/cache.h +++ b/cpp/src/gandiva/cache.h @@ -20,14 +20,27 @@ #include #include +#include "arrow/util/macros.h" #include "gandiva/lru_cache.h" #include "gandiva/visibility.h" namespace gandiva { +namespace internal { +// Only called once by GetCacheCapacity(). +// Do the actual work of getting the cache capacity from env var. +// Also makes the testing easier. +GANDIVA_EXPORT +int GetCacheCapacityFromEnvVar(); +} // namespace internal + +ARROW_DEPRECATED("Deprecated in 17.0.0. Use GetCacheCapacity instead.") GANDIVA_EXPORT int GetCapacity(); +GANDIVA_EXPORT +int GetCacheCapacity(); + GANDIVA_EXPORT void LogCacheSize(size_t capacity); @@ -36,7 +49,7 @@ class Cache { public: explicit Cache(size_t capacity) : cache_(capacity) { LogCacheSize(capacity); } - Cache() : Cache(GetCapacity()) {} + Cache() : Cache(GetCacheCapacity()) {} ValueType GetObjectCode(const KeyType& cache_key) { std::optional result; diff --git a/cpp/src/gandiva/cache_test.cc b/cpp/src/gandiva/cache_test.cc index a146707079fa6..96cf4a12e587a 100644 --- a/cpp/src/gandiva/cache_test.cc +++ b/cpp/src/gandiva/cache_test.cc @@ -16,10 +16,14 @@ // under the License. #include "gandiva/cache.h" +#include "arrow/testing/gtest_util.h" +#include "arrow/util/io_util.h" +#include "arrow/util/logging.h" #include namespace gandiva { + class TestCacheKey { public: explicit TestCacheKey(int value) : value_(value) {} @@ -38,5 +42,67 @@ TEST(TestCache, TestGetPut) { ASSERT_EQ(cache.GetObjectCode(TestCacheKey(2)), "world"); } -TEST(TestCache, TestGetCacheCapacity) { ASSERT_EQ(GetCapacity(), 5000); } +namespace { +constexpr auto cache_capacity_env_var = "GANDIVA_CACHE_SIZE"; +constexpr auto default_cache_capacity = 5000; +} // namespace + +TEST(TestCache, TestGetCacheCapacityDefault) { + ASSERT_EQ(GetCacheCapacity(), default_cache_capacity); +} + +TEST(TestCache, TestGetCacheCapacityEnvVar) { + using ::arrow::EnvVarGuard; + + // Empty. + { + EnvVarGuard guard(cache_capacity_env_var, ""); + ASSERT_EQ(internal::GetCacheCapacityFromEnvVar(), default_cache_capacity); + } + + // Non-number. + { + EnvVarGuard guard(cache_capacity_env_var, "invalid"); + ASSERT_EQ(internal::GetCacheCapacityFromEnvVar(), default_cache_capacity); + } + + // Number with invalid suffix. + { + EnvVarGuard guard(cache_capacity_env_var, "42MB"); + ASSERT_EQ(internal::GetCacheCapacityFromEnvVar(), default_cache_capacity); + } + + // Valid positive number. + { + EnvVarGuard guard(cache_capacity_env_var, "42"); + ASSERT_EQ(internal::GetCacheCapacityFromEnvVar(), 42); + } + + // Int max. + { + auto str = std::to_string(std::numeric_limits::max()); + EnvVarGuard guard(cache_capacity_env_var, str.c_str()); + ASSERT_EQ(internal::GetCacheCapacityFromEnvVar(), std::numeric_limits::max()); + } + + // Zero. + { + EnvVarGuard guard(cache_capacity_env_var, "0"); + ASSERT_EQ(internal::GetCacheCapacityFromEnvVar(), default_cache_capacity); + } + + // Negative number. + { + EnvVarGuard guard(cache_capacity_env_var, "-1"); + ASSERT_EQ(internal::GetCacheCapacityFromEnvVar(), default_cache_capacity); + } + + // Over int max. + { + auto str = std::to_string(static_cast(std::numeric_limits::max()) + 1); + EnvVarGuard guard(cache_capacity_env_var, str.c_str()); + ASSERT_EQ(internal::GetCacheCapacityFromEnvVar(), default_cache_capacity); + } +} + } // namespace gandiva diff --git a/cpp/src/gandiva/function_registry.cc b/cpp/src/gandiva/function_registry.cc index 2e392630ee009..0955a2e47fcaf 100644 --- a/cpp/src/gandiva/function_registry.cc +++ b/cpp/src/gandiva/function_registry.cc @@ -147,7 +147,7 @@ arrow::Result> MakeDefaultFunctionRegistry() { ARROW_RETURN_NOT_OK(registry->Add(func_signature)); } } - return std::move(registry); + return registry; } std::shared_ptr default_function_registry() { diff --git a/cpp/src/gandiva/llvm_generator.cc b/cpp/src/gandiva/llvm_generator.cc index 62ebab08f4d6b..4afa2935ace33 100644 --- a/cpp/src/gandiva/llvm_generator.cc +++ b/cpp/src/gandiva/llvm_generator.cc @@ -1251,7 +1251,7 @@ LValuePtr LLVMGenerator::Visitor::BuildFunctionCall(const NativeFunction* func, // Make the function call auto out = decimalIR.CallDecimalFunction(func->pc_name(), llvm_return_type, *params); ret_lvalue->set_data(out); - return std::move(ret_lvalue); + return ret_lvalue; } else { bool isDecimalFunction = false; for (auto& arg : *params) { diff --git a/cpp/src/gandiva/precompiled/string_ops.cc b/cpp/src/gandiva/precompiled/string_ops.cc index 5aa0eb38eafd7..3849cf7bdf9a5 100644 --- a/cpp/src/gandiva/precompiled/string_ops.cc +++ b/cpp/src/gandiva/precompiled/string_ops.cc @@ -1377,7 +1377,7 @@ gdv_int32 ascii_utf8(const char* data, gdv_int32 data_len) { if (data_len == 0) { return 0; } - return static_cast(data[0]); + return static_cast(static_cast(data[0])); } // Returns the ASCII character having the binary equivalent to A. diff --git a/cpp/src/gandiva/precompiled/string_ops_test.cc b/cpp/src/gandiva/precompiled/string_ops_test.cc index 89213592e7ea2..aaa25db0a9f8d 100644 --- a/cpp/src/gandiva/precompiled/string_ops_test.cc +++ b/cpp/src/gandiva/precompiled/string_ops_test.cc @@ -51,6 +51,8 @@ TEST(TestStringOps, TestAscii) { EXPECT_EQ(ascii_utf8("", 0), 0); EXPECT_EQ(ascii_utf8("123", 3), 49); EXPECT_EQ(ascii_utf8("999", 3), 57); + EXPECT_EQ(ascii_utf8("\x80", 1), -128); + EXPECT_EQ(ascii_utf8("\xFF", 1), -1); } TEST(TestStringOps, TestChrBigInt) { diff --git a/cpp/src/generated/parquet_types.cpp b/cpp/src/generated/parquet_types.cpp index 8932c4a4f8d19..1ba0c4626233f 100644 --- a/cpp/src/generated/parquet_types.cpp +++ b/cpp/src/generated/parquet_types.cpp @@ -640,128 +640,6 @@ std::ostream& operator<<(std::ostream& out, const SizeStatistics& obj) } -uint32_t SizeStatistics::read(::apache::thrift::protocol::TProtocol* iprot) { - - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::apache::thrift::protocol::T_I64) { - xfer += iprot->readI64(this->unencoded_byte_array_data_bytes); - this->__isset.unencoded_byte_array_data_bytes = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::apache::thrift::protocol::T_LIST) { - { - this->repetition_level_histogram.clear(); - uint32_t _size0; - ::apache::thrift::protocol::TType _etype3; - xfer += iprot->readListBegin(_etype3, _size0); - this->repetition_level_histogram.resize(_size0); - uint32_t _i4; - for (_i4 = 0; _i4 < _size0; ++_i4) - { - xfer += iprot->readI64(this->repetition_level_histogram[_i4]); - } - xfer += iprot->readListEnd(); - } - this->__isset.repetition_level_histogram = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 3: - if (ftype == ::apache::thrift::protocol::T_LIST) { - { - this->definition_level_histogram.clear(); - uint32_t _size5; - ::apache::thrift::protocol::TType _etype8; - xfer += iprot->readListBegin(_etype8, _size5); - this->definition_level_histogram.resize(_size5); - uint32_t _i9; - for (_i9 = 0; _i9 < _size5; ++_i9) - { - xfer += iprot->readI64(this->definition_level_histogram[_i9]); - } - xfer += iprot->readListEnd(); - } - this->__isset.definition_level_histogram = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t SizeStatistics::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("SizeStatistics"); - - if (this->__isset.unencoded_byte_array_data_bytes) { - xfer += oprot->writeFieldBegin("unencoded_byte_array_data_bytes", ::apache::thrift::protocol::T_I64, 1); - xfer += oprot->writeI64(this->unencoded_byte_array_data_bytes); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.repetition_level_histogram) { - xfer += oprot->writeFieldBegin("repetition_level_histogram", ::apache::thrift::protocol::T_LIST, 2); - { - xfer += oprot->writeListBegin(::apache::thrift::protocol::T_I64, static_cast(this->repetition_level_histogram.size())); - std::vector ::const_iterator _iter10; - for (_iter10 = this->repetition_level_histogram.begin(); _iter10 != this->repetition_level_histogram.end(); ++_iter10) - { - xfer += oprot->writeI64((*_iter10)); - } - xfer += oprot->writeListEnd(); - } - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.definition_level_histogram) { - xfer += oprot->writeFieldBegin("definition_level_histogram", ::apache::thrift::protocol::T_LIST, 3); - { - xfer += oprot->writeListBegin(::apache::thrift::protocol::T_I64, static_cast(this->definition_level_histogram.size())); - std::vector ::const_iterator _iter11; - for (_iter11 = this->definition_level_histogram.begin(); _iter11 != this->definition_level_histogram.end(); ++_iter11) - { - xfer += oprot->writeI64((*_iter11)); - } - xfer += oprot->writeListEnd(); - } - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - void swap(SizeStatistics &a, SizeStatistics &b) { using ::std::swap; swap(a.unencoded_byte_array_data_bytes, b.unencoded_byte_array_data_bytes); @@ -856,153 +734,6 @@ std::ostream& operator<<(std::ostream& out, const Statistics& obj) } -uint32_t Statistics::read(::apache::thrift::protocol::TProtocol* iprot) { - - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::apache::thrift::protocol::T_STRING) { - xfer += iprot->readBinary(this->max); - this->__isset.max = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::apache::thrift::protocol::T_STRING) { - xfer += iprot->readBinary(this->min); - this->__isset.min = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 3: - if (ftype == ::apache::thrift::protocol::T_I64) { - xfer += iprot->readI64(this->null_count); - this->__isset.null_count = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 4: - if (ftype == ::apache::thrift::protocol::T_I64) { - xfer += iprot->readI64(this->distinct_count); - this->__isset.distinct_count = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 5: - if (ftype == ::apache::thrift::protocol::T_STRING) { - xfer += iprot->readBinary(this->max_value); - this->__isset.max_value = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 6: - if (ftype == ::apache::thrift::protocol::T_STRING) { - xfer += iprot->readBinary(this->min_value); - this->__isset.min_value = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 7: - if (ftype == ::apache::thrift::protocol::T_BOOL) { - xfer += iprot->readBool(this->is_max_value_exact); - this->__isset.is_max_value_exact = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 8: - if (ftype == ::apache::thrift::protocol::T_BOOL) { - xfer += iprot->readBool(this->is_min_value_exact); - this->__isset.is_min_value_exact = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t Statistics::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("Statistics"); - - if (this->__isset.max) { - xfer += oprot->writeFieldBegin("max", ::apache::thrift::protocol::T_STRING, 1); - xfer += oprot->writeBinary(this->max); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.min) { - xfer += oprot->writeFieldBegin("min", ::apache::thrift::protocol::T_STRING, 2); - xfer += oprot->writeBinary(this->min); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.null_count) { - xfer += oprot->writeFieldBegin("null_count", ::apache::thrift::protocol::T_I64, 3); - xfer += oprot->writeI64(this->null_count); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.distinct_count) { - xfer += oprot->writeFieldBegin("distinct_count", ::apache::thrift::protocol::T_I64, 4); - xfer += oprot->writeI64(this->distinct_count); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.max_value) { - xfer += oprot->writeFieldBegin("max_value", ::apache::thrift::protocol::T_STRING, 5); - xfer += oprot->writeBinary(this->max_value); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.min_value) { - xfer += oprot->writeFieldBegin("min_value", ::apache::thrift::protocol::T_STRING, 6); - xfer += oprot->writeBinary(this->min_value); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.is_max_value_exact) { - xfer += oprot->writeFieldBegin("is_max_value_exact", ::apache::thrift::protocol::T_BOOL, 7); - xfer += oprot->writeBool(this->is_max_value_exact); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.is_min_value_exact) { - xfer += oprot->writeFieldBegin("is_min_value_exact", ::apache::thrift::protocol::T_BOOL, 8); - xfer += oprot->writeBool(this->is_min_value_exact); - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - void swap(Statistics &a, Statistics &b) { using ::std::swap; swap(a.max, b.max); @@ -1087,44 +818,6 @@ std::ostream& operator<<(std::ostream& out, const StringType& obj) } -uint32_t StringType::read(::apache::thrift::protocol::TProtocol* iprot) { - - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - xfer += iprot->skip(ftype); - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t StringType::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("StringType"); - - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - void swap(StringType &a, StringType &b) { using ::std::swap; (void) a; @@ -1162,44 +855,6 @@ std::ostream& operator<<(std::ostream& out, const UUIDType& obj) } -uint32_t UUIDType::read(::apache::thrift::protocol::TProtocol* iprot) { - - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - xfer += iprot->skip(ftype); - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t UUIDType::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("UUIDType"); - - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - void swap(UUIDType &a, UUIDType &b) { using ::std::swap; (void) a; @@ -1237,44 +892,6 @@ std::ostream& operator<<(std::ostream& out, const MapType& obj) } -uint32_t MapType::read(::apache::thrift::protocol::TProtocol* iprot) { - - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - xfer += iprot->skip(ftype); - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t MapType::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("MapType"); - - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - void swap(MapType &a, MapType &b) { using ::std::swap; (void) a; @@ -1312,44 +929,6 @@ std::ostream& operator<<(std::ostream& out, const ListType& obj) } -uint32_t ListType::read(::apache::thrift::protocol::TProtocol* iprot) { - - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - xfer += iprot->skip(ftype); - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t ListType::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("ListType"); - - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - void swap(ListType &a, ListType &b) { using ::std::swap; (void) a; @@ -1387,44 +966,6 @@ std::ostream& operator<<(std::ostream& out, const EnumType& obj) } -uint32_t EnumType::read(::apache::thrift::protocol::TProtocol* iprot) { - - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - xfer += iprot->skip(ftype); - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t EnumType::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("EnumType"); - - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - void swap(EnumType &a, EnumType &b) { using ::std::swap; (void) a; @@ -1462,44 +1003,6 @@ std::ostream& operator<<(std::ostream& out, const DateType& obj) } -uint32_t DateType::read(::apache::thrift::protocol::TProtocol* iprot) { - - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - xfer += iprot->skip(ftype); - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t DateType::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("DateType"); - - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - void swap(DateType &a, DateType &b) { using ::std::swap; (void) a; @@ -1537,44 +1040,6 @@ std::ostream& operator<<(std::ostream& out, const Float16Type& obj) } -uint32_t Float16Type::read(::apache::thrift::protocol::TProtocol* iprot) { - - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - xfer += iprot->skip(ftype); - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t Float16Type::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("Float16Type"); - - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - void swap(Float16Type &a, Float16Type &b) { using ::std::swap; (void) a; @@ -1612,44 +1077,6 @@ std::ostream& operator<<(std::ostream& out, const NullType& obj) } -uint32_t NullType::read(::apache::thrift::protocol::TProtocol* iprot) { - - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - xfer += iprot->skip(ftype); - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t NullType::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("NullType"); - - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - void swap(NullType &a, NullType &b) { using ::std::swap; (void) a; @@ -1695,79 +1122,6 @@ std::ostream& operator<<(std::ostream& out, const DecimalType& obj) } -uint32_t DecimalType::read(::apache::thrift::protocol::TProtocol* iprot) { - - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - bool isset_scale = false; - bool isset_precision = false; - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->scale); - isset_scale = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->precision); - isset_precision = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - if (!isset_scale) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_precision) - throw TProtocolException(TProtocolException::INVALID_DATA); - return xfer; -} - -uint32_t DecimalType::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("DecimalType"); - - xfer += oprot->writeFieldBegin("scale", ::apache::thrift::protocol::T_I32, 1); - xfer += oprot->writeI32(this->scale); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("precision", ::apache::thrift::protocol::T_I32, 2); - xfer += oprot->writeI32(this->precision); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - void swap(DecimalType &a, DecimalType &b) { using ::std::swap; swap(a.scale, b.scale); @@ -1811,44 +1165,6 @@ std::ostream& operator<<(std::ostream& out, const MilliSeconds& obj) } -uint32_t MilliSeconds::read(::apache::thrift::protocol::TProtocol* iprot) { - - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - xfer += iprot->skip(ftype); - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t MilliSeconds::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("MilliSeconds"); - - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - void swap(MilliSeconds &a, MilliSeconds &b) { using ::std::swap; (void) a; @@ -1886,44 +1202,6 @@ std::ostream& operator<<(std::ostream& out, const MicroSeconds& obj) } -uint32_t MicroSeconds::read(::apache::thrift::protocol::TProtocol* iprot) { - - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - xfer += iprot->skip(ftype); - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t MicroSeconds::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("MicroSeconds"); - - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - void swap(MicroSeconds &a, MicroSeconds &b) { using ::std::swap; (void) a; @@ -1961,44 +1239,6 @@ std::ostream& operator<<(std::ostream& out, const NanoSeconds& obj) } -uint32_t NanoSeconds::read(::apache::thrift::protocol::TProtocol* iprot) { - - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - xfer += iprot->skip(ftype); - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t NanoSeconds::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("NanoSeconds"); - - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - void swap(NanoSeconds &a, NanoSeconds &b) { using ::std::swap; (void) a; @@ -2051,88 +1291,6 @@ std::ostream& operator<<(std::ostream& out, const TimeUnit& obj) } -uint32_t TimeUnit::read(::apache::thrift::protocol::TProtocol* iprot) { - - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->MILLIS.read(iprot); - this->__isset.MILLIS = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->MICROS.read(iprot); - this->__isset.MICROS = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 3: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->NANOS.read(iprot); - this->__isset.NANOS = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t TimeUnit::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("TimeUnit"); - - if (this->__isset.MILLIS) { - xfer += oprot->writeFieldBegin("MILLIS", ::apache::thrift::protocol::T_STRUCT, 1); - xfer += this->MILLIS.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.MICROS) { - xfer += oprot->writeFieldBegin("MICROS", ::apache::thrift::protocol::T_STRUCT, 2); - xfer += this->MICROS.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.NANOS) { - xfer += oprot->writeFieldBegin("NANOS", ::apache::thrift::protocol::T_STRUCT, 3); - xfer += this->NANOS.write(oprot); - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - void swap(TimeUnit &a, TimeUnit &b) { using ::std::swap; swap(a.MILLIS, b.MILLIS); @@ -2195,79 +1353,6 @@ std::ostream& operator<<(std::ostream& out, const TimestampType& obj) } -uint32_t TimestampType::read(::apache::thrift::protocol::TProtocol* iprot) { - - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - bool isset_isAdjustedToUTC = false; - bool isset_unit = false; - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::apache::thrift::protocol::T_BOOL) { - xfer += iprot->readBool(this->isAdjustedToUTC); - isset_isAdjustedToUTC = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->unit.read(iprot); - isset_unit = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - if (!isset_isAdjustedToUTC) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_unit) - throw TProtocolException(TProtocolException::INVALID_DATA); - return xfer; -} - -uint32_t TimestampType::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("TimestampType"); - - xfer += oprot->writeFieldBegin("isAdjustedToUTC", ::apache::thrift::protocol::T_BOOL, 1); - xfer += oprot->writeBool(this->isAdjustedToUTC); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("unit", ::apache::thrift::protocol::T_STRUCT, 2); - xfer += this->unit.write(oprot); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - void swap(TimestampType &a, TimestampType &b) { using ::std::swap; swap(a.isAdjustedToUTC, b.isAdjustedToUTC); @@ -2319,79 +1404,6 @@ std::ostream& operator<<(std::ostream& out, const TimeType& obj) } -uint32_t TimeType::read(::apache::thrift::protocol::TProtocol* iprot) { - - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - bool isset_isAdjustedToUTC = false; - bool isset_unit = false; - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::apache::thrift::protocol::T_BOOL) { - xfer += iprot->readBool(this->isAdjustedToUTC); - isset_isAdjustedToUTC = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->unit.read(iprot); - isset_unit = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - if (!isset_isAdjustedToUTC) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_unit) - throw TProtocolException(TProtocolException::INVALID_DATA); - return xfer; -} - -uint32_t TimeType::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("TimeType"); - - xfer += oprot->writeFieldBegin("isAdjustedToUTC", ::apache::thrift::protocol::T_BOOL, 1); - xfer += oprot->writeBool(this->isAdjustedToUTC); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("unit", ::apache::thrift::protocol::T_STRUCT, 2); - xfer += this->unit.write(oprot); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - void swap(TimeType &a, TimeType &b) { using ::std::swap; swap(a.isAdjustedToUTC, b.isAdjustedToUTC); @@ -2443,79 +1455,6 @@ std::ostream& operator<<(std::ostream& out, const IntType& obj) } -uint32_t IntType::read(::apache::thrift::protocol::TProtocol* iprot) { - - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - bool isset_bitWidth = false; - bool isset_isSigned = false; - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::apache::thrift::protocol::T_BYTE) { - xfer += iprot->readByte(this->bitWidth); - isset_bitWidth = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::apache::thrift::protocol::T_BOOL) { - xfer += iprot->readBool(this->isSigned); - isset_isSigned = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - if (!isset_bitWidth) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_isSigned) - throw TProtocolException(TProtocolException::INVALID_DATA); - return xfer; -} - -uint32_t IntType::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("IntType"); - - xfer += oprot->writeFieldBegin("bitWidth", ::apache::thrift::protocol::T_BYTE, 1); - xfer += oprot->writeByte(this->bitWidth); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("isSigned", ::apache::thrift::protocol::T_BOOL, 2); - xfer += oprot->writeBool(this->isSigned); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - void swap(IntType &a, IntType &b) { using ::std::swap; swap(a.bitWidth, b.bitWidth); @@ -2559,44 +1498,6 @@ std::ostream& operator<<(std::ostream& out, const JsonType& obj) } -uint32_t JsonType::read(::apache::thrift::protocol::TProtocol* iprot) { - - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - xfer += iprot->skip(ftype); - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t JsonType::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("JsonType"); - - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - void swap(JsonType &a, JsonType &b) { using ::std::swap; (void) a; @@ -2634,44 +1535,6 @@ std::ostream& operator<<(std::ostream& out, const BsonType& obj) } -uint32_t BsonType::read(::apache::thrift::protocol::TProtocol* iprot) { - - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - xfer += iprot->skip(ftype); - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t BsonType::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("BsonType"); - - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - void swap(BsonType &a, BsonType &b) { using ::std::swap; (void) a; @@ -2779,231 +1642,6 @@ std::ostream& operator<<(std::ostream& out, const LogicalType& obj) } -uint32_t LogicalType::read(::apache::thrift::protocol::TProtocol* iprot) { - - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->STRING.read(iprot); - this->__isset.STRING = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->MAP.read(iprot); - this->__isset.MAP = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 3: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->LIST.read(iprot); - this->__isset.LIST = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 4: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->ENUM.read(iprot); - this->__isset.ENUM = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 5: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->DECIMAL.read(iprot); - this->__isset.DECIMAL = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 6: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->DATE.read(iprot); - this->__isset.DATE = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 7: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->TIME.read(iprot); - this->__isset.TIME = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 8: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->TIMESTAMP.read(iprot); - this->__isset.TIMESTAMP = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 10: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->INTEGER.read(iprot); - this->__isset.INTEGER = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 11: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->UNKNOWN.read(iprot); - this->__isset.UNKNOWN = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 12: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->JSON.read(iprot); - this->__isset.JSON = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 13: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->BSON.read(iprot); - this->__isset.BSON = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 14: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->UUID.read(iprot); - this->__isset.UUID = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 15: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->FLOAT16.read(iprot); - this->__isset.FLOAT16 = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t LogicalType::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("LogicalType"); - - if (this->__isset.STRING) { - xfer += oprot->writeFieldBegin("STRING", ::apache::thrift::protocol::T_STRUCT, 1); - xfer += this->STRING.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.MAP) { - xfer += oprot->writeFieldBegin("MAP", ::apache::thrift::protocol::T_STRUCT, 2); - xfer += this->MAP.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.LIST) { - xfer += oprot->writeFieldBegin("LIST", ::apache::thrift::protocol::T_STRUCT, 3); - xfer += this->LIST.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.ENUM) { - xfer += oprot->writeFieldBegin("ENUM", ::apache::thrift::protocol::T_STRUCT, 4); - xfer += this->ENUM.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.DECIMAL) { - xfer += oprot->writeFieldBegin("DECIMAL", ::apache::thrift::protocol::T_STRUCT, 5); - xfer += this->DECIMAL.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.DATE) { - xfer += oprot->writeFieldBegin("DATE", ::apache::thrift::protocol::T_STRUCT, 6); - xfer += this->DATE.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.TIME) { - xfer += oprot->writeFieldBegin("TIME", ::apache::thrift::protocol::T_STRUCT, 7); - xfer += this->TIME.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.TIMESTAMP) { - xfer += oprot->writeFieldBegin("TIMESTAMP", ::apache::thrift::protocol::T_STRUCT, 8); - xfer += this->TIMESTAMP.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.INTEGER) { - xfer += oprot->writeFieldBegin("INTEGER", ::apache::thrift::protocol::T_STRUCT, 10); - xfer += this->INTEGER.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.UNKNOWN) { - xfer += oprot->writeFieldBegin("UNKNOWN", ::apache::thrift::protocol::T_STRUCT, 11); - xfer += this->UNKNOWN.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.JSON) { - xfer += oprot->writeFieldBegin("JSON", ::apache::thrift::protocol::T_STRUCT, 12); - xfer += this->JSON.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.BSON) { - xfer += oprot->writeFieldBegin("BSON", ::apache::thrift::protocol::T_STRUCT, 13); - xfer += this->BSON.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.UUID) { - xfer += oprot->writeFieldBegin("UUID", ::apache::thrift::protocol::T_STRUCT, 14); - xfer += this->UUID.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.FLOAT16) { - xfer += oprot->writeFieldBegin("FLOAT16", ::apache::thrift::protocol::T_STRUCT, 15); - xfer += this->FLOAT16.write(oprot); - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - void swap(LogicalType &a, LogicalType &b) { using ::std::swap; swap(a.STRING, b.STRING); @@ -3173,187 +1811,6 @@ std::ostream& operator<<(std::ostream& out, const SchemaElement& obj) } -uint32_t SchemaElement::read(::apache::thrift::protocol::TProtocol* iprot) { - - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - bool isset_name = false; - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::apache::thrift::protocol::T_I32) { - int32_t ecast96; - xfer += iprot->readI32(ecast96); - this->type = static_cast(ecast96); - this->__isset.type = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->type_length); - this->__isset.type_length = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 3: - if (ftype == ::apache::thrift::protocol::T_I32) { - int32_t ecast97; - xfer += iprot->readI32(ecast97); - this->repetition_type = static_cast(ecast97); - this->__isset.repetition_type = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 4: - if (ftype == ::apache::thrift::protocol::T_STRING) { - xfer += iprot->readString(this->name); - isset_name = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 5: - if (ftype == ::apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->num_children); - this->__isset.num_children = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 6: - if (ftype == ::apache::thrift::protocol::T_I32) { - int32_t ecast98; - xfer += iprot->readI32(ecast98); - this->converted_type = static_cast(ecast98); - this->__isset.converted_type = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 7: - if (ftype == ::apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->scale); - this->__isset.scale = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 8: - if (ftype == ::apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->precision); - this->__isset.precision = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 9: - if (ftype == ::apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->field_id); - this->__isset.field_id = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 10: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->logicalType.read(iprot); - this->__isset.logicalType = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - if (!isset_name) - throw TProtocolException(TProtocolException::INVALID_DATA); - return xfer; -} - -uint32_t SchemaElement::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("SchemaElement"); - - if (this->__isset.type) { - xfer += oprot->writeFieldBegin("type", ::apache::thrift::protocol::T_I32, 1); - xfer += oprot->writeI32(static_cast(this->type)); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.type_length) { - xfer += oprot->writeFieldBegin("type_length", ::apache::thrift::protocol::T_I32, 2); - xfer += oprot->writeI32(this->type_length); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.repetition_type) { - xfer += oprot->writeFieldBegin("repetition_type", ::apache::thrift::protocol::T_I32, 3); - xfer += oprot->writeI32(static_cast(this->repetition_type)); - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldBegin("name", ::apache::thrift::protocol::T_STRING, 4); - xfer += oprot->writeString(this->name); - xfer += oprot->writeFieldEnd(); - - if (this->__isset.num_children) { - xfer += oprot->writeFieldBegin("num_children", ::apache::thrift::protocol::T_I32, 5); - xfer += oprot->writeI32(this->num_children); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.converted_type) { - xfer += oprot->writeFieldBegin("converted_type", ::apache::thrift::protocol::T_I32, 6); - xfer += oprot->writeI32(static_cast(this->converted_type)); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.scale) { - xfer += oprot->writeFieldBegin("scale", ::apache::thrift::protocol::T_I32, 7); - xfer += oprot->writeI32(this->scale); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.precision) { - xfer += oprot->writeFieldBegin("precision", ::apache::thrift::protocol::T_I32, 8); - xfer += oprot->writeI32(this->precision); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.field_id) { - xfer += oprot->writeFieldBegin("field_id", ::apache::thrift::protocol::T_I32, 9); - xfer += oprot->writeI32(this->field_id); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.logicalType) { - xfer += oprot->writeFieldBegin("logicalType", ::apache::thrift::protocol::T_STRUCT, 10); - xfer += this->logicalType.write(oprot); - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - void swap(SchemaElement &a, SchemaElement &b) { using ::std::swap; swap(a.type, b.type); @@ -3471,128 +1928,6 @@ std::ostream& operator<<(std::ostream& out, const DataPageHeader& obj) } -uint32_t DataPageHeader::read(::apache::thrift::protocol::TProtocol* iprot) { - - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - bool isset_num_values = false; - bool isset_encoding = false; - bool isset_definition_level_encoding = false; - bool isset_repetition_level_encoding = false; - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->num_values); - isset_num_values = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::apache::thrift::protocol::T_I32) { - int32_t ecast103; - xfer += iprot->readI32(ecast103); - this->encoding = static_cast(ecast103); - isset_encoding = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 3: - if (ftype == ::apache::thrift::protocol::T_I32) { - int32_t ecast104; - xfer += iprot->readI32(ecast104); - this->definition_level_encoding = static_cast(ecast104); - isset_definition_level_encoding = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 4: - if (ftype == ::apache::thrift::protocol::T_I32) { - int32_t ecast105; - xfer += iprot->readI32(ecast105); - this->repetition_level_encoding = static_cast(ecast105); - isset_repetition_level_encoding = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 5: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->statistics.read(iprot); - this->__isset.statistics = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - if (!isset_num_values) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_encoding) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_definition_level_encoding) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_repetition_level_encoding) - throw TProtocolException(TProtocolException::INVALID_DATA); - return xfer; -} - -uint32_t DataPageHeader::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("DataPageHeader"); - - xfer += oprot->writeFieldBegin("num_values", ::apache::thrift::protocol::T_I32, 1); - xfer += oprot->writeI32(this->num_values); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("encoding", ::apache::thrift::protocol::T_I32, 2); - xfer += oprot->writeI32(static_cast(this->encoding)); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("definition_level_encoding", ::apache::thrift::protocol::T_I32, 3); - xfer += oprot->writeI32(static_cast(this->definition_level_encoding)); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("repetition_level_encoding", ::apache::thrift::protocol::T_I32, 4); - xfer += oprot->writeI32(static_cast(this->repetition_level_encoding)); - xfer += oprot->writeFieldEnd(); - - if (this->__isset.statistics) { - xfer += oprot->writeFieldBegin("statistics", ::apache::thrift::protocol::T_STRUCT, 5); - xfer += this->statistics.write(oprot); - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - void swap(DataPageHeader &a, DataPageHeader &b) { using ::std::swap; swap(a.num_values, b.num_values); @@ -3659,44 +1994,6 @@ std::ostream& operator<<(std::ostream& out, const IndexPageHeader& obj) } -uint32_t IndexPageHeader::read(::apache::thrift::protocol::TProtocol* iprot) { - - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - xfer += iprot->skip(ftype); - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t IndexPageHeader::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("IndexPageHeader"); - - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - void swap(IndexPageHeader &a, IndexPageHeader &b) { using ::std::swap; (void) a; @@ -3747,94 +2044,6 @@ std::ostream& operator<<(std::ostream& out, const DictionaryPageHeader& obj) } -uint32_t DictionaryPageHeader::read(::apache::thrift::protocol::TProtocol* iprot) { - - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - bool isset_num_values = false; - bool isset_encoding = false; - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->num_values); - isset_num_values = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::apache::thrift::protocol::T_I32) { - int32_t ecast114; - xfer += iprot->readI32(ecast114); - this->encoding = static_cast(ecast114); - isset_encoding = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 3: - if (ftype == ::apache::thrift::protocol::T_BOOL) { - xfer += iprot->readBool(this->is_sorted); - this->__isset.is_sorted = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - if (!isset_num_values) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_encoding) - throw TProtocolException(TProtocolException::INVALID_DATA); - return xfer; -} - -uint32_t DictionaryPageHeader::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("DictionaryPageHeader"); - - xfer += oprot->writeFieldBegin("num_values", ::apache::thrift::protocol::T_I32, 1); - xfer += oprot->writeI32(this->num_values); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("encoding", ::apache::thrift::protocol::T_I32, 2); - xfer += oprot->writeI32(static_cast(this->encoding)); - xfer += oprot->writeFieldEnd(); - - if (this->__isset.is_sorted) { - xfer += oprot->writeFieldBegin("is_sorted", ::apache::thrift::protocol::T_BOOL, 3); - xfer += oprot->writeBool(this->is_sorted); - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - void swap(DictionaryPageHeader &a, DictionaryPageHeader &b) { using ::std::swap; swap(a.num_values, b.num_values); @@ -3923,167 +2132,6 @@ std::ostream& operator<<(std::ostream& out, const DataPageHeaderV2& obj) } -uint32_t DataPageHeaderV2::read(::apache::thrift::protocol::TProtocol* iprot) { - - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - bool isset_num_values = false; - bool isset_num_nulls = false; - bool isset_num_rows = false; - bool isset_encoding = false; - bool isset_definition_levels_byte_length = false; - bool isset_repetition_levels_byte_length = false; - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->num_values); - isset_num_values = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->num_nulls); - isset_num_nulls = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 3: - if (ftype == ::apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->num_rows); - isset_num_rows = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 4: - if (ftype == ::apache::thrift::protocol::T_I32) { - int32_t ecast119; - xfer += iprot->readI32(ecast119); - this->encoding = static_cast(ecast119); - isset_encoding = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 5: - if (ftype == ::apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->definition_levels_byte_length); - isset_definition_levels_byte_length = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 6: - if (ftype == ::apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->repetition_levels_byte_length); - isset_repetition_levels_byte_length = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 7: - if (ftype == ::apache::thrift::protocol::T_BOOL) { - xfer += iprot->readBool(this->is_compressed); - this->__isset.is_compressed = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 8: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->statistics.read(iprot); - this->__isset.statistics = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - if (!isset_num_values) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_num_nulls) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_num_rows) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_encoding) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_definition_levels_byte_length) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_repetition_levels_byte_length) - throw TProtocolException(TProtocolException::INVALID_DATA); - return xfer; -} - -uint32_t DataPageHeaderV2::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("DataPageHeaderV2"); - - xfer += oprot->writeFieldBegin("num_values", ::apache::thrift::protocol::T_I32, 1); - xfer += oprot->writeI32(this->num_values); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("num_nulls", ::apache::thrift::protocol::T_I32, 2); - xfer += oprot->writeI32(this->num_nulls); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("num_rows", ::apache::thrift::protocol::T_I32, 3); - xfer += oprot->writeI32(this->num_rows); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("encoding", ::apache::thrift::protocol::T_I32, 4); - xfer += oprot->writeI32(static_cast(this->encoding)); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("definition_levels_byte_length", ::apache::thrift::protocol::T_I32, 5); - xfer += oprot->writeI32(this->definition_levels_byte_length); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("repetition_levels_byte_length", ::apache::thrift::protocol::T_I32, 6); - xfer += oprot->writeI32(this->repetition_levels_byte_length); - xfer += oprot->writeFieldEnd(); - - if (this->__isset.is_compressed) { - xfer += oprot->writeFieldBegin("is_compressed", ::apache::thrift::protocol::T_BOOL, 7); - xfer += oprot->writeBool(this->is_compressed); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.statistics) { - xfer += oprot->writeFieldBegin("statistics", ::apache::thrift::protocol::T_STRUCT, 8); - xfer += this->statistics.write(oprot); - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - void swap(DataPageHeaderV2 &a, DataPageHeaderV2 &b) { using ::std::swap; swap(a.num_values, b.num_values); @@ -4168,44 +2216,6 @@ std::ostream& operator<<(std::ostream& out, const SplitBlockAlgorithm& obj) } -uint32_t SplitBlockAlgorithm::read(::apache::thrift::protocol::TProtocol* iprot) { - - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - xfer += iprot->skip(ftype); - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t SplitBlockAlgorithm::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("SplitBlockAlgorithm"); - - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - void swap(SplitBlockAlgorithm &a, SplitBlockAlgorithm &b) { using ::std::swap; (void) a; @@ -4248,62 +2258,6 @@ std::ostream& operator<<(std::ostream& out, const BloomFilterAlgorithm& obj) } -uint32_t BloomFilterAlgorithm::read(::apache::thrift::protocol::TProtocol* iprot) { - - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->BLOCK.read(iprot); - this->__isset.BLOCK = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t BloomFilterAlgorithm::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("BloomFilterAlgorithm"); - - if (this->__isset.BLOCK) { - xfer += oprot->writeFieldBegin("BLOCK", ::apache::thrift::protocol::T_STRUCT, 1); - xfer += this->BLOCK.write(oprot); - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - void swap(BloomFilterAlgorithm &a, BloomFilterAlgorithm &b) { using ::std::swap; swap(a.BLOCK, b.BLOCK); @@ -4346,44 +2300,6 @@ std::ostream& operator<<(std::ostream& out, const XxHash& obj) } -uint32_t XxHash::read(::apache::thrift::protocol::TProtocol* iprot) { - - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - xfer += iprot->skip(ftype); - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t XxHash::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("XxHash"); - - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - void swap(XxHash &a, XxHash &b) { using ::std::swap; (void) a; @@ -4426,62 +2342,6 @@ std::ostream& operator<<(std::ostream& out, const BloomFilterHash& obj) } -uint32_t BloomFilterHash::read(::apache::thrift::protocol::TProtocol* iprot) { - - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->XXHASH.read(iprot); - this->__isset.XXHASH = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t BloomFilterHash::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("BloomFilterHash"); - - if (this->__isset.XXHASH) { - xfer += oprot->writeFieldBegin("XXHASH", ::apache::thrift::protocol::T_STRUCT, 1); - xfer += this->XXHASH.write(oprot); - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - void swap(BloomFilterHash &a, BloomFilterHash &b) { using ::std::swap; swap(a.XXHASH, b.XXHASH); @@ -4511,57 +2371,19 @@ void BloomFilterHash::printTo(std::ostream& out) const { out << "BloomFilterHash("; out << "XXHASH="; (__isset.XXHASH ? (out << to_string(XXHASH)) : (out << "")); out << ")"; -} - - -Uncompressed::~Uncompressed() noexcept { -} - -std::ostream& operator<<(std::ostream& out, const Uncompressed& obj) -{ - obj.printTo(out); - return out; -} - - -uint32_t Uncompressed::read(::apache::thrift::protocol::TProtocol* iprot) { - - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - xfer += iprot->skip(ftype); - xfer += iprot->readFieldEnd(); - } +} - xfer += iprot->readStructEnd(); - return xfer; +Uncompressed::~Uncompressed() noexcept { } -uint32_t Uncompressed::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("Uncompressed"); - - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; +std::ostream& operator<<(std::ostream& out, const Uncompressed& obj) +{ + obj.printTo(out); + return out; } + void swap(Uncompressed &a, Uncompressed &b) { using ::std::swap; (void) a; @@ -4604,62 +2426,6 @@ std::ostream& operator<<(std::ostream& out, const BloomFilterCompression& obj) } -uint32_t BloomFilterCompression::read(::apache::thrift::protocol::TProtocol* iprot) { - - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->UNCOMPRESSED.read(iprot); - this->__isset.UNCOMPRESSED = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t BloomFilterCompression::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("BloomFilterCompression"); - - if (this->__isset.UNCOMPRESSED) { - xfer += oprot->writeFieldBegin("UNCOMPRESSED", ::apache::thrift::protocol::T_STRUCT, 1); - xfer += this->UNCOMPRESSED.write(oprot); - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - void swap(BloomFilterCompression &a, BloomFilterCompression &b) { using ::std::swap; swap(a.UNCOMPRESSED, b.UNCOMPRESSED); @@ -4718,109 +2484,6 @@ std::ostream& operator<<(std::ostream& out, const BloomFilterHeader& obj) } -uint32_t BloomFilterHeader::read(::apache::thrift::protocol::TProtocol* iprot) { - - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - bool isset_numBytes = false; - bool isset_algorithm = false; - bool isset_hash = false; - bool isset_compression = false; - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->numBytes); - isset_numBytes = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->algorithm.read(iprot); - isset_algorithm = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 3: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->hash.read(iprot); - isset_hash = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 4: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->compression.read(iprot); - isset_compression = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - if (!isset_numBytes) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_algorithm) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_hash) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_compression) - throw TProtocolException(TProtocolException::INVALID_DATA); - return xfer; -} - -uint32_t BloomFilterHeader::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("BloomFilterHeader"); - - xfer += oprot->writeFieldBegin("numBytes", ::apache::thrift::protocol::T_I32, 1); - xfer += oprot->writeI32(this->numBytes); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("algorithm", ::apache::thrift::protocol::T_STRUCT, 2); - xfer += this->algorithm.write(oprot); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("hash", ::apache::thrift::protocol::T_STRUCT, 3); - xfer += this->hash.write(oprot); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("compression", ::apache::thrift::protocol::T_STRUCT, 4); - xfer += this->compression.write(oprot); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - void swap(BloomFilterHeader &a, BloomFilterHeader &b) { using ::std::swap; swap(a.numBytes, b.numBytes); @@ -4913,161 +2576,6 @@ std::ostream& operator<<(std::ostream& out, const PageHeader& obj) } -uint32_t PageHeader::read(::apache::thrift::protocol::TProtocol* iprot) { - - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - bool isset_type = false; - bool isset_uncompressed_page_size = false; - bool isset_compressed_page_size = false; - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::apache::thrift::protocol::T_I32) { - int32_t ecast152; - xfer += iprot->readI32(ecast152); - this->type = static_cast(ecast152); - isset_type = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->uncompressed_page_size); - isset_uncompressed_page_size = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 3: - if (ftype == ::apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->compressed_page_size); - isset_compressed_page_size = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 4: - if (ftype == ::apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->crc); - this->__isset.crc = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 5: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->data_page_header.read(iprot); - this->__isset.data_page_header = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 6: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->index_page_header.read(iprot); - this->__isset.index_page_header = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 7: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->dictionary_page_header.read(iprot); - this->__isset.dictionary_page_header = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 8: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->data_page_header_v2.read(iprot); - this->__isset.data_page_header_v2 = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - if (!isset_type) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_uncompressed_page_size) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_compressed_page_size) - throw TProtocolException(TProtocolException::INVALID_DATA); - return xfer; -} - -uint32_t PageHeader::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("PageHeader"); - - xfer += oprot->writeFieldBegin("type", ::apache::thrift::protocol::T_I32, 1); - xfer += oprot->writeI32(static_cast(this->type)); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("uncompressed_page_size", ::apache::thrift::protocol::T_I32, 2); - xfer += oprot->writeI32(this->uncompressed_page_size); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("compressed_page_size", ::apache::thrift::protocol::T_I32, 3); - xfer += oprot->writeI32(this->compressed_page_size); - xfer += oprot->writeFieldEnd(); - - if (this->__isset.crc) { - xfer += oprot->writeFieldBegin("crc", ::apache::thrift::protocol::T_I32, 4); - xfer += oprot->writeI32(this->crc); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.data_page_header) { - xfer += oprot->writeFieldBegin("data_page_header", ::apache::thrift::protocol::T_STRUCT, 5); - xfer += this->data_page_header.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.index_page_header) { - xfer += oprot->writeFieldBegin("index_page_header", ::apache::thrift::protocol::T_STRUCT, 6); - xfer += this->index_page_header.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.dictionary_page_header) { - xfer += oprot->writeFieldBegin("dictionary_page_header", ::apache::thrift::protocol::T_STRUCT, 7); - xfer += this->dictionary_page_header.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.data_page_header_v2) { - xfer += oprot->writeFieldBegin("data_page_header_v2", ::apache::thrift::protocol::T_STRUCT, 8); - xfer += this->data_page_header_v2.write(oprot); - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - void swap(PageHeader &a, PageHeader &b) { using ::std::swap; swap(a.type, b.type); @@ -5161,77 +2669,6 @@ std::ostream& operator<<(std::ostream& out, const KeyValue& obj) } -uint32_t KeyValue::read(::apache::thrift::protocol::TProtocol* iprot) { - - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - bool isset_key = false; - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::apache::thrift::protocol::T_STRING) { - xfer += iprot->readString(this->key); - isset_key = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::apache::thrift::protocol::T_STRING) { - xfer += iprot->readString(this->value); - this->__isset.value = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - if (!isset_key) - throw TProtocolException(TProtocolException::INVALID_DATA); - return xfer; -} - -uint32_t KeyValue::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("KeyValue"); - - xfer += oprot->writeFieldBegin("key", ::apache::thrift::protocol::T_STRING, 1); - xfer += oprot->writeString(this->key); - xfer += oprot->writeFieldEnd(); - - if (this->__isset.value) { - xfer += oprot->writeFieldBegin("value", ::apache::thrift::protocol::T_STRING, 2); - xfer += oprot->writeString(this->value); - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - void swap(KeyValue &a, KeyValue &b) { using ::std::swap; swap(a.key, b.key); @@ -5292,94 +2729,6 @@ std::ostream& operator<<(std::ostream& out, const SortingColumn& obj) } -uint32_t SortingColumn::read(::apache::thrift::protocol::TProtocol* iprot) { - - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - bool isset_column_idx = false; - bool isset_descending = false; - bool isset_nulls_first = false; - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->column_idx); - isset_column_idx = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::apache::thrift::protocol::T_BOOL) { - xfer += iprot->readBool(this->descending); - isset_descending = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 3: - if (ftype == ::apache::thrift::protocol::T_BOOL) { - xfer += iprot->readBool(this->nulls_first); - isset_nulls_first = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - if (!isset_column_idx) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_descending) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_nulls_first) - throw TProtocolException(TProtocolException::INVALID_DATA); - return xfer; -} - -uint32_t SortingColumn::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("SortingColumn"); - - xfer += oprot->writeFieldBegin("column_idx", ::apache::thrift::protocol::T_I32, 1); - xfer += oprot->writeI32(this->column_idx); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("descending", ::apache::thrift::protocol::T_BOOL, 2); - xfer += oprot->writeBool(this->descending); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("nulls_first", ::apache::thrift::protocol::T_BOOL, 3); - xfer += oprot->writeBool(this->nulls_first); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - void swap(SortingColumn &a, SortingColumn &b) { using ::std::swap; swap(a.column_idx, b.column_idx); @@ -5441,98 +2790,6 @@ std::ostream& operator<<(std::ostream& out, const PageEncodingStats& obj) } -uint32_t PageEncodingStats::read(::apache::thrift::protocol::TProtocol* iprot) { - - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - bool isset_page_type = false; - bool isset_encoding = false; - bool isset_count = false; - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::apache::thrift::protocol::T_I32) { - int32_t ecast165; - xfer += iprot->readI32(ecast165); - this->page_type = static_cast(ecast165); - isset_page_type = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::apache::thrift::protocol::T_I32) { - int32_t ecast166; - xfer += iprot->readI32(ecast166); - this->encoding = static_cast(ecast166); - isset_encoding = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 3: - if (ftype == ::apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->count); - isset_count = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - if (!isset_page_type) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_encoding) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_count) - throw TProtocolException(TProtocolException::INVALID_DATA); - return xfer; -} - -uint32_t PageEncodingStats::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("PageEncodingStats"); - - xfer += oprot->writeFieldBegin("page_type", ::apache::thrift::protocol::T_I32, 1); - xfer += oprot->writeI32(static_cast(this->page_type)); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("encoding", ::apache::thrift::protocol::T_I32, 2); - xfer += oprot->writeI32(static_cast(this->encoding)); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("count", ::apache::thrift::protocol::T_I32, 3); - xfer += oprot->writeI32(this->count); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - void swap(PageEncodingStats &a, PageEncodingStats &b) { using ::std::swap; swap(a.page_type, b.page_type); @@ -5654,359 +2911,6 @@ std::ostream& operator<<(std::ostream& out, const ColumnMetaData& obj) } -uint32_t ColumnMetaData::read(::apache::thrift::protocol::TProtocol* iprot) { - - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - bool isset_type = false; - bool isset_encodings = false; - bool isset_path_in_schema = false; - bool isset_codec = false; - bool isset_num_values = false; - bool isset_total_uncompressed_size = false; - bool isset_total_compressed_size = false; - bool isset_data_page_offset = false; - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::apache::thrift::protocol::T_I32) { - int32_t ecast171; - xfer += iprot->readI32(ecast171); - this->type = static_cast(ecast171); - isset_type = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::apache::thrift::protocol::T_LIST) { - { - this->encodings.clear(); - uint32_t _size172; - ::apache::thrift::protocol::TType _etype175; - xfer += iprot->readListBegin(_etype175, _size172); - this->encodings.resize(_size172); - uint32_t _i176; - for (_i176 = 0; _i176 < _size172; ++_i176) - { - int32_t ecast177; - xfer += iprot->readI32(ecast177); - this->encodings[_i176] = static_cast(ecast177); - } - xfer += iprot->readListEnd(); - } - isset_encodings = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 3: - if (ftype == ::apache::thrift::protocol::T_LIST) { - { - this->path_in_schema.clear(); - uint32_t _size178; - ::apache::thrift::protocol::TType _etype181; - xfer += iprot->readListBegin(_etype181, _size178); - this->path_in_schema.resize(_size178); - uint32_t _i182; - for (_i182 = 0; _i182 < _size178; ++_i182) - { - xfer += iprot->readString(this->path_in_schema[_i182]); - } - xfer += iprot->readListEnd(); - } - isset_path_in_schema = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 4: - if (ftype == ::apache::thrift::protocol::T_I32) { - int32_t ecast183; - xfer += iprot->readI32(ecast183); - this->codec = static_cast(ecast183); - isset_codec = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 5: - if (ftype == ::apache::thrift::protocol::T_I64) { - xfer += iprot->readI64(this->num_values); - isset_num_values = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 6: - if (ftype == ::apache::thrift::protocol::T_I64) { - xfer += iprot->readI64(this->total_uncompressed_size); - isset_total_uncompressed_size = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 7: - if (ftype == ::apache::thrift::protocol::T_I64) { - xfer += iprot->readI64(this->total_compressed_size); - isset_total_compressed_size = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 8: - if (ftype == ::apache::thrift::protocol::T_LIST) { - { - this->key_value_metadata.clear(); - uint32_t _size184; - ::apache::thrift::protocol::TType _etype187; - xfer += iprot->readListBegin(_etype187, _size184); - this->key_value_metadata.resize(_size184); - uint32_t _i188; - for (_i188 = 0; _i188 < _size184; ++_i188) - { - xfer += this->key_value_metadata[_i188].read(iprot); - } - xfer += iprot->readListEnd(); - } - this->__isset.key_value_metadata = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 9: - if (ftype == ::apache::thrift::protocol::T_I64) { - xfer += iprot->readI64(this->data_page_offset); - isset_data_page_offset = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 10: - if (ftype == ::apache::thrift::protocol::T_I64) { - xfer += iprot->readI64(this->index_page_offset); - this->__isset.index_page_offset = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 11: - if (ftype == ::apache::thrift::protocol::T_I64) { - xfer += iprot->readI64(this->dictionary_page_offset); - this->__isset.dictionary_page_offset = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 12: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->statistics.read(iprot); - this->__isset.statistics = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 13: - if (ftype == ::apache::thrift::protocol::T_LIST) { - { - this->encoding_stats.clear(); - uint32_t _size189; - ::apache::thrift::protocol::TType _etype192; - xfer += iprot->readListBegin(_etype192, _size189); - this->encoding_stats.resize(_size189); - uint32_t _i193; - for (_i193 = 0; _i193 < _size189; ++_i193) - { - xfer += this->encoding_stats[_i193].read(iprot); - } - xfer += iprot->readListEnd(); - } - this->__isset.encoding_stats = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 14: - if (ftype == ::apache::thrift::protocol::T_I64) { - xfer += iprot->readI64(this->bloom_filter_offset); - this->__isset.bloom_filter_offset = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 15: - if (ftype == ::apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->bloom_filter_length); - this->__isset.bloom_filter_length = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 16: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->size_statistics.read(iprot); - this->__isset.size_statistics = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - if (!isset_type) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_encodings) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_path_in_schema) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_codec) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_num_values) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_total_uncompressed_size) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_total_compressed_size) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_data_page_offset) - throw TProtocolException(TProtocolException::INVALID_DATA); - return xfer; -} - -uint32_t ColumnMetaData::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("ColumnMetaData"); - - xfer += oprot->writeFieldBegin("type", ::apache::thrift::protocol::T_I32, 1); - xfer += oprot->writeI32(static_cast(this->type)); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("encodings", ::apache::thrift::protocol::T_LIST, 2); - { - xfer += oprot->writeListBegin(::apache::thrift::protocol::T_I32, static_cast(this->encodings.size())); - std::vector ::const_iterator _iter194; - for (_iter194 = this->encodings.begin(); _iter194 != this->encodings.end(); ++_iter194) - { - xfer += oprot->writeI32(static_cast((*_iter194))); - } - xfer += oprot->writeListEnd(); - } - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("path_in_schema", ::apache::thrift::protocol::T_LIST, 3); - { - xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRING, static_cast(this->path_in_schema.size())); - std::vector ::const_iterator _iter195; - for (_iter195 = this->path_in_schema.begin(); _iter195 != this->path_in_schema.end(); ++_iter195) - { - xfer += oprot->writeString((*_iter195)); - } - xfer += oprot->writeListEnd(); - } - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("codec", ::apache::thrift::protocol::T_I32, 4); - xfer += oprot->writeI32(static_cast(this->codec)); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("num_values", ::apache::thrift::protocol::T_I64, 5); - xfer += oprot->writeI64(this->num_values); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("total_uncompressed_size", ::apache::thrift::protocol::T_I64, 6); - xfer += oprot->writeI64(this->total_uncompressed_size); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("total_compressed_size", ::apache::thrift::protocol::T_I64, 7); - xfer += oprot->writeI64(this->total_compressed_size); - xfer += oprot->writeFieldEnd(); - - if (this->__isset.key_value_metadata) { - xfer += oprot->writeFieldBegin("key_value_metadata", ::apache::thrift::protocol::T_LIST, 8); - { - xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast(this->key_value_metadata.size())); - std::vector ::const_iterator _iter196; - for (_iter196 = this->key_value_metadata.begin(); _iter196 != this->key_value_metadata.end(); ++_iter196) - { - xfer += (*_iter196).write(oprot); - } - xfer += oprot->writeListEnd(); - } - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldBegin("data_page_offset", ::apache::thrift::protocol::T_I64, 9); - xfer += oprot->writeI64(this->data_page_offset); - xfer += oprot->writeFieldEnd(); - - if (this->__isset.index_page_offset) { - xfer += oprot->writeFieldBegin("index_page_offset", ::apache::thrift::protocol::T_I64, 10); - xfer += oprot->writeI64(this->index_page_offset); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.dictionary_page_offset) { - xfer += oprot->writeFieldBegin("dictionary_page_offset", ::apache::thrift::protocol::T_I64, 11); - xfer += oprot->writeI64(this->dictionary_page_offset); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.statistics) { - xfer += oprot->writeFieldBegin("statistics", ::apache::thrift::protocol::T_STRUCT, 12); - xfer += this->statistics.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.encoding_stats) { - xfer += oprot->writeFieldBegin("encoding_stats", ::apache::thrift::protocol::T_LIST, 13); - { - xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast(this->encoding_stats.size())); - std::vector ::const_iterator _iter197; - for (_iter197 = this->encoding_stats.begin(); _iter197 != this->encoding_stats.end(); ++_iter197) - { - xfer += (*_iter197).write(oprot); - } - xfer += oprot->writeListEnd(); - } - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.bloom_filter_offset) { - xfer += oprot->writeFieldBegin("bloom_filter_offset", ::apache::thrift::protocol::T_I64, 14); - xfer += oprot->writeI64(this->bloom_filter_offset); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.bloom_filter_length) { - xfer += oprot->writeFieldBegin("bloom_filter_length", ::apache::thrift::protocol::T_I32, 15); - xfer += oprot->writeI32(this->bloom_filter_length); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.size_statistics) { - xfer += oprot->writeFieldBegin("size_statistics", ::apache::thrift::protocol::T_STRUCT, 16); - xfer += this->size_statistics.write(oprot); - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - void swap(ColumnMetaData &a, ColumnMetaData &b) { using ::std::swap; swap(a.type, b.type); @@ -6139,44 +3043,6 @@ std::ostream& operator<<(std::ostream& out, const EncryptionWithFooterKey& obj) } -uint32_t EncryptionWithFooterKey::read(::apache::thrift::protocol::TProtocol* iprot) { - - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - xfer += iprot->skip(ftype); - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t EncryptionWithFooterKey::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("EncryptionWithFooterKey"); - - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - void swap(EncryptionWithFooterKey &a, EncryptionWithFooterKey &b) { using ::std::swap; (void) a; @@ -6223,97 +3089,6 @@ std::ostream& operator<<(std::ostream& out, const EncryptionWithColumnKey& obj) } -uint32_t EncryptionWithColumnKey::read(::apache::thrift::protocol::TProtocol* iprot) { - - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - bool isset_path_in_schema = false; - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::apache::thrift::protocol::T_LIST) { - { - this->path_in_schema.clear(); - uint32_t _size206; - ::apache::thrift::protocol::TType _etype209; - xfer += iprot->readListBegin(_etype209, _size206); - this->path_in_schema.resize(_size206); - uint32_t _i210; - for (_i210 = 0; _i210 < _size206; ++_i210) - { - xfer += iprot->readString(this->path_in_schema[_i210]); - } - xfer += iprot->readListEnd(); - } - isset_path_in_schema = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::apache::thrift::protocol::T_STRING) { - xfer += iprot->readBinary(this->key_metadata); - this->__isset.key_metadata = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - if (!isset_path_in_schema) - throw TProtocolException(TProtocolException::INVALID_DATA); - return xfer; -} - -uint32_t EncryptionWithColumnKey::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("EncryptionWithColumnKey"); - - xfer += oprot->writeFieldBegin("path_in_schema", ::apache::thrift::protocol::T_LIST, 1); - { - xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRING, static_cast(this->path_in_schema.size())); - std::vector ::const_iterator _iter211; - for (_iter211 = this->path_in_schema.begin(); _iter211 != this->path_in_schema.end(); ++_iter211) - { - xfer += oprot->writeString((*_iter211)); - } - xfer += oprot->writeListEnd(); - } - xfer += oprot->writeFieldEnd(); - - if (this->__isset.key_metadata) { - xfer += oprot->writeFieldBegin("key_metadata", ::apache::thrift::protocol::T_STRING, 2); - xfer += oprot->writeBinary(this->key_metadata); - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - void swap(EncryptionWithColumnKey &a, EncryptionWithColumnKey &b) { using ::std::swap; swap(a.path_in_schema, b.path_in_schema); @@ -6372,75 +3147,6 @@ std::ostream& operator<<(std::ostream& out, const ColumnCryptoMetaData& obj) } -uint32_t ColumnCryptoMetaData::read(::apache::thrift::protocol::TProtocol* iprot) { - - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->ENCRYPTION_WITH_FOOTER_KEY.read(iprot); - this->__isset.ENCRYPTION_WITH_FOOTER_KEY = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->ENCRYPTION_WITH_COLUMN_KEY.read(iprot); - this->__isset.ENCRYPTION_WITH_COLUMN_KEY = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t ColumnCryptoMetaData::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("ColumnCryptoMetaData"); - - if (this->__isset.ENCRYPTION_WITH_FOOTER_KEY) { - xfer += oprot->writeFieldBegin("ENCRYPTION_WITH_FOOTER_KEY", ::apache::thrift::protocol::T_STRUCT, 1); - xfer += this->ENCRYPTION_WITH_FOOTER_KEY.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.ENCRYPTION_WITH_COLUMN_KEY) { - xfer += oprot->writeFieldBegin("ENCRYPTION_WITH_COLUMN_KEY", ::apache::thrift::protocol::T_STRUCT, 2); - xfer += this->ENCRYPTION_WITH_COLUMN_KEY.write(oprot); - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - void swap(ColumnCryptoMetaData &a, ColumnCryptoMetaData &b) { using ::std::swap; swap(a.ENCRYPTION_WITH_FOOTER_KEY, b.ENCRYPTION_WITH_FOOTER_KEY); @@ -6533,168 +3239,6 @@ std::ostream& operator<<(std::ostream& out, const ColumnChunk& obj) } -uint32_t ColumnChunk::read(::apache::thrift::protocol::TProtocol* iprot) { - - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - bool isset_file_offset = false; - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::apache::thrift::protocol::T_STRING) { - xfer += iprot->readString(this->file_path); - this->__isset.file_path = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::apache::thrift::protocol::T_I64) { - xfer += iprot->readI64(this->file_offset); - isset_file_offset = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 3: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->meta_data.read(iprot); - this->__isset.meta_data = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 4: - if (ftype == ::apache::thrift::protocol::T_I64) { - xfer += iprot->readI64(this->offset_index_offset); - this->__isset.offset_index_offset = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 5: - if (ftype == ::apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->offset_index_length); - this->__isset.offset_index_length = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 6: - if (ftype == ::apache::thrift::protocol::T_I64) { - xfer += iprot->readI64(this->column_index_offset); - this->__isset.column_index_offset = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 7: - if (ftype == ::apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->column_index_length); - this->__isset.column_index_length = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 8: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->crypto_metadata.read(iprot); - this->__isset.crypto_metadata = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 9: - if (ftype == ::apache::thrift::protocol::T_STRING) { - xfer += iprot->readBinary(this->encrypted_column_metadata); - this->__isset.encrypted_column_metadata = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - if (!isset_file_offset) - throw TProtocolException(TProtocolException::INVALID_DATA); - return xfer; -} - -uint32_t ColumnChunk::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("ColumnChunk"); - - if (this->__isset.file_path) { - xfer += oprot->writeFieldBegin("file_path", ::apache::thrift::protocol::T_STRING, 1); - xfer += oprot->writeString(this->file_path); - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldBegin("file_offset", ::apache::thrift::protocol::T_I64, 2); - xfer += oprot->writeI64(this->file_offset); - xfer += oprot->writeFieldEnd(); - - if (this->__isset.meta_data) { - xfer += oprot->writeFieldBegin("meta_data", ::apache::thrift::protocol::T_STRUCT, 3); - xfer += this->meta_data.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.offset_index_offset) { - xfer += oprot->writeFieldBegin("offset_index_offset", ::apache::thrift::protocol::T_I64, 4); - xfer += oprot->writeI64(this->offset_index_offset); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.offset_index_length) { - xfer += oprot->writeFieldBegin("offset_index_length", ::apache::thrift::protocol::T_I32, 5); - xfer += oprot->writeI32(this->offset_index_length); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.column_index_offset) { - xfer += oprot->writeFieldBegin("column_index_offset", ::apache::thrift::protocol::T_I64, 6); - xfer += oprot->writeI64(this->column_index_offset); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.column_index_length) { - xfer += oprot->writeFieldBegin("column_index_length", ::apache::thrift::protocol::T_I32, 7); - xfer += oprot->writeI32(this->column_index_length); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.crypto_metadata) { - xfer += oprot->writeFieldBegin("crypto_metadata", ::apache::thrift::protocol::T_STRUCT, 8); - xfer += this->crypto_metadata.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.encrypted_column_metadata) { - xfer += oprot->writeFieldBegin("encrypted_column_metadata", ::apache::thrift::protocol::T_STRING, 9); - xfer += oprot->writeBinary(this->encrypted_column_metadata); - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - void swap(ColumnChunk &a, ColumnChunk &b) { using ::std::swap; swap(a.file_path, b.file_path); @@ -6817,186 +3361,6 @@ std::ostream& operator<<(std::ostream& out, const RowGroup& obj) } -uint32_t RowGroup::read(::apache::thrift::protocol::TProtocol* iprot) { - - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - bool isset_columns = false; - bool isset_total_byte_size = false; - bool isset_num_rows = false; - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::apache::thrift::protocol::T_LIST) { - { - this->columns.clear(); - uint32_t _size224; - ::apache::thrift::protocol::TType _etype227; - xfer += iprot->readListBegin(_etype227, _size224); - this->columns.resize(_size224); - uint32_t _i228; - for (_i228 = 0; _i228 < _size224; ++_i228) - { - xfer += this->columns[_i228].read(iprot); - } - xfer += iprot->readListEnd(); - } - isset_columns = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::apache::thrift::protocol::T_I64) { - xfer += iprot->readI64(this->total_byte_size); - isset_total_byte_size = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 3: - if (ftype == ::apache::thrift::protocol::T_I64) { - xfer += iprot->readI64(this->num_rows); - isset_num_rows = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 4: - if (ftype == ::apache::thrift::protocol::T_LIST) { - { - this->sorting_columns.clear(); - uint32_t _size229; - ::apache::thrift::protocol::TType _etype232; - xfer += iprot->readListBegin(_etype232, _size229); - this->sorting_columns.resize(_size229); - uint32_t _i233; - for (_i233 = 0; _i233 < _size229; ++_i233) - { - xfer += this->sorting_columns[_i233].read(iprot); - } - xfer += iprot->readListEnd(); - } - this->__isset.sorting_columns = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 5: - if (ftype == ::apache::thrift::protocol::T_I64) { - xfer += iprot->readI64(this->file_offset); - this->__isset.file_offset = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 6: - if (ftype == ::apache::thrift::protocol::T_I64) { - xfer += iprot->readI64(this->total_compressed_size); - this->__isset.total_compressed_size = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 7: - if (ftype == ::apache::thrift::protocol::T_I16) { - xfer += iprot->readI16(this->ordinal); - this->__isset.ordinal = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - if (!isset_columns) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_total_byte_size) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_num_rows) - throw TProtocolException(TProtocolException::INVALID_DATA); - return xfer; -} - -uint32_t RowGroup::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("RowGroup"); - - xfer += oprot->writeFieldBegin("columns", ::apache::thrift::protocol::T_LIST, 1); - { - xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast(this->columns.size())); - std::vector ::const_iterator _iter234; - for (_iter234 = this->columns.begin(); _iter234 != this->columns.end(); ++_iter234) - { - xfer += (*_iter234).write(oprot); - } - xfer += oprot->writeListEnd(); - } - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("total_byte_size", ::apache::thrift::protocol::T_I64, 2); - xfer += oprot->writeI64(this->total_byte_size); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("num_rows", ::apache::thrift::protocol::T_I64, 3); - xfer += oprot->writeI64(this->num_rows); - xfer += oprot->writeFieldEnd(); - - if (this->__isset.sorting_columns) { - xfer += oprot->writeFieldBegin("sorting_columns", ::apache::thrift::protocol::T_LIST, 4); - { - xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast(this->sorting_columns.size())); - std::vector ::const_iterator _iter235; - for (_iter235 = this->sorting_columns.begin(); _iter235 != this->sorting_columns.end(); ++_iter235) - { - xfer += (*_iter235).write(oprot); - } - xfer += oprot->writeListEnd(); - } - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.file_offset) { - xfer += oprot->writeFieldBegin("file_offset", ::apache::thrift::protocol::T_I64, 5); - xfer += oprot->writeI64(this->file_offset); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.total_compressed_size) { - xfer += oprot->writeFieldBegin("total_compressed_size", ::apache::thrift::protocol::T_I64, 6); - xfer += oprot->writeI64(this->total_compressed_size); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.ordinal) { - xfer += oprot->writeFieldBegin("ordinal", ::apache::thrift::protocol::T_I16, 7); - xfer += oprot->writeI16(this->ordinal); - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - void swap(RowGroup &a, RowGroup &b) { using ::std::swap; swap(a.columns, b.columns); @@ -7075,44 +3439,6 @@ std::ostream& operator<<(std::ostream& out, const TypeDefinedOrder& obj) } -uint32_t TypeDefinedOrder::read(::apache::thrift::protocol::TProtocol* iprot) { - - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - xfer += iprot->skip(ftype); - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t TypeDefinedOrder::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("TypeDefinedOrder"); - - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - void swap(TypeDefinedOrder &a, TypeDefinedOrder &b) { using ::std::swap; (void) a; @@ -7155,62 +3481,6 @@ std::ostream& operator<<(std::ostream& out, const ColumnOrder& obj) } -uint32_t ColumnOrder::read(::apache::thrift::protocol::TProtocol* iprot) { - - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->TYPE_ORDER.read(iprot); - this->__isset.TYPE_ORDER = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t ColumnOrder::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("ColumnOrder"); - - if (this->__isset.TYPE_ORDER) { - xfer += oprot->writeFieldBegin("TYPE_ORDER", ::apache::thrift::protocol::T_STRUCT, 1); - xfer += this->TYPE_ORDER.write(oprot); - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - void swap(ColumnOrder &a, ColumnOrder &b) { using ::std::swap; swap(a.TYPE_ORDER, b.TYPE_ORDER); @@ -7265,94 +3535,6 @@ std::ostream& operator<<(std::ostream& out, const PageLocation& obj) } -uint32_t PageLocation::read(::apache::thrift::protocol::TProtocol* iprot) { - - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - bool isset_offset = false; - bool isset_compressed_page_size = false; - bool isset_first_row_index = false; - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::apache::thrift::protocol::T_I64) { - xfer += iprot->readI64(this->offset); - isset_offset = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->compressed_page_size); - isset_compressed_page_size = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 3: - if (ftype == ::apache::thrift::protocol::T_I64) { - xfer += iprot->readI64(this->first_row_index); - isset_first_row_index = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - if (!isset_offset) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_compressed_page_size) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_first_row_index) - throw TProtocolException(TProtocolException::INVALID_DATA); - return xfer; -} - -uint32_t PageLocation::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("PageLocation"); - - xfer += oprot->writeFieldBegin("offset", ::apache::thrift::protocol::T_I64, 1); - xfer += oprot->writeI64(this->offset); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("compressed_page_size", ::apache::thrift::protocol::T_I32, 2); - xfer += oprot->writeI32(this->compressed_page_size); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("first_row_index", ::apache::thrift::protocol::T_I64, 3); - xfer += oprot->writeI64(this->first_row_index); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - void swap(PageLocation &a, PageLocation &b) { using ::std::swap; swap(a.offset, b.offset); @@ -7411,117 +3593,6 @@ std::ostream& operator<<(std::ostream& out, const OffsetIndex& obj) } -uint32_t OffsetIndex::read(::apache::thrift::protocol::TProtocol* iprot) { - - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - bool isset_page_locations = false; - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::apache::thrift::protocol::T_LIST) { - { - this->page_locations.clear(); - uint32_t _size252; - ::apache::thrift::protocol::TType _etype255; - xfer += iprot->readListBegin(_etype255, _size252); - this->page_locations.resize(_size252); - uint32_t _i256; - for (_i256 = 0; _i256 < _size252; ++_i256) - { - xfer += this->page_locations[_i256].read(iprot); - } - xfer += iprot->readListEnd(); - } - isset_page_locations = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::apache::thrift::protocol::T_LIST) { - { - this->unencoded_byte_array_data_bytes.clear(); - uint32_t _size257; - ::apache::thrift::protocol::TType _etype260; - xfer += iprot->readListBegin(_etype260, _size257); - this->unencoded_byte_array_data_bytes.resize(_size257); - uint32_t _i261; - for (_i261 = 0; _i261 < _size257; ++_i261) - { - xfer += iprot->readI64(this->unencoded_byte_array_data_bytes[_i261]); - } - xfer += iprot->readListEnd(); - } - this->__isset.unencoded_byte_array_data_bytes = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - if (!isset_page_locations) - throw TProtocolException(TProtocolException::INVALID_DATA); - return xfer; -} - -uint32_t OffsetIndex::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("OffsetIndex"); - - xfer += oprot->writeFieldBegin("page_locations", ::apache::thrift::protocol::T_LIST, 1); - { - xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast(this->page_locations.size())); - std::vector ::const_iterator _iter262; - for (_iter262 = this->page_locations.begin(); _iter262 != this->page_locations.end(); ++_iter262) - { - xfer += (*_iter262).write(oprot); - } - xfer += oprot->writeListEnd(); - } - xfer += oprot->writeFieldEnd(); - - if (this->__isset.unencoded_byte_array_data_bytes) { - xfer += oprot->writeFieldBegin("unencoded_byte_array_data_bytes", ::apache::thrift::protocol::T_LIST, 2); - { - xfer += oprot->writeListBegin(::apache::thrift::protocol::T_I64, static_cast(this->unencoded_byte_array_data_bytes.size())); - std::vector ::const_iterator _iter263; - for (_iter263 = this->unencoded_byte_array_data_bytes.begin(); _iter263 != this->unencoded_byte_array_data_bytes.end(); ++_iter263) - { - xfer += oprot->writeI64((*_iter263)); - } - xfer += oprot->writeListEnd(); - } - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - void swap(OffsetIndex &a, OffsetIndex &b) { using ::std::swap; swap(a.page_locations, b.page_locations); @@ -7601,270 +3672,6 @@ std::ostream& operator<<(std::ostream& out, const ColumnIndex& obj) } -uint32_t ColumnIndex::read(::apache::thrift::protocol::TProtocol* iprot) { - - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - bool isset_null_pages = false; - bool isset_min_values = false; - bool isset_max_values = false; - bool isset_boundary_order = false; - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::apache::thrift::protocol::T_LIST) { - { - this->null_pages.clear(); - uint32_t _size268; - ::apache::thrift::protocol::TType _etype271; - xfer += iprot->readListBegin(_etype271, _size268); - this->null_pages.resize(_size268); - uint32_t _i272; - for (_i272 = 0; _i272 < _size268; ++_i272) - { - xfer += iprot->readBool(this->null_pages[_i272]); - } - xfer += iprot->readListEnd(); - } - isset_null_pages = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::apache::thrift::protocol::T_LIST) { - { - this->min_values.clear(); - uint32_t _size273; - ::apache::thrift::protocol::TType _etype276; - xfer += iprot->readListBegin(_etype276, _size273); - this->min_values.resize(_size273); - uint32_t _i277; - for (_i277 = 0; _i277 < _size273; ++_i277) - { - xfer += iprot->readBinary(this->min_values[_i277]); - } - xfer += iprot->readListEnd(); - } - isset_min_values = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 3: - if (ftype == ::apache::thrift::protocol::T_LIST) { - { - this->max_values.clear(); - uint32_t _size278; - ::apache::thrift::protocol::TType _etype281; - xfer += iprot->readListBegin(_etype281, _size278); - this->max_values.resize(_size278); - uint32_t _i282; - for (_i282 = 0; _i282 < _size278; ++_i282) - { - xfer += iprot->readBinary(this->max_values[_i282]); - } - xfer += iprot->readListEnd(); - } - isset_max_values = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 4: - if (ftype == ::apache::thrift::protocol::T_I32) { - int32_t ecast283; - xfer += iprot->readI32(ecast283); - this->boundary_order = static_cast(ecast283); - isset_boundary_order = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 5: - if (ftype == ::apache::thrift::protocol::T_LIST) { - { - this->null_counts.clear(); - uint32_t _size284; - ::apache::thrift::protocol::TType _etype287; - xfer += iprot->readListBegin(_etype287, _size284); - this->null_counts.resize(_size284); - uint32_t _i288; - for (_i288 = 0; _i288 < _size284; ++_i288) - { - xfer += iprot->readI64(this->null_counts[_i288]); - } - xfer += iprot->readListEnd(); - } - this->__isset.null_counts = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 6: - if (ftype == ::apache::thrift::protocol::T_LIST) { - { - this->repetition_level_histograms.clear(); - uint32_t _size289; - ::apache::thrift::protocol::TType _etype292; - xfer += iprot->readListBegin(_etype292, _size289); - this->repetition_level_histograms.resize(_size289); - uint32_t _i293; - for (_i293 = 0; _i293 < _size289; ++_i293) - { - xfer += iprot->readI64(this->repetition_level_histograms[_i293]); - } - xfer += iprot->readListEnd(); - } - this->__isset.repetition_level_histograms = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 7: - if (ftype == ::apache::thrift::protocol::T_LIST) { - { - this->definition_level_histograms.clear(); - uint32_t _size294; - ::apache::thrift::protocol::TType _etype297; - xfer += iprot->readListBegin(_etype297, _size294); - this->definition_level_histograms.resize(_size294); - uint32_t _i298; - for (_i298 = 0; _i298 < _size294; ++_i298) - { - xfer += iprot->readI64(this->definition_level_histograms[_i298]); - } - xfer += iprot->readListEnd(); - } - this->__isset.definition_level_histograms = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - if (!isset_null_pages) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_min_values) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_max_values) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_boundary_order) - throw TProtocolException(TProtocolException::INVALID_DATA); - return xfer; -} - -uint32_t ColumnIndex::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("ColumnIndex"); - - xfer += oprot->writeFieldBegin("null_pages", ::apache::thrift::protocol::T_LIST, 1); - { - xfer += oprot->writeListBegin(::apache::thrift::protocol::T_BOOL, static_cast(this->null_pages.size())); - std::vector ::const_iterator _iter299; - for (_iter299 = this->null_pages.begin(); _iter299 != this->null_pages.end(); ++_iter299) - { - xfer += oprot->writeBool((*_iter299)); - } - xfer += oprot->writeListEnd(); - } - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("min_values", ::apache::thrift::protocol::T_LIST, 2); - { - xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRING, static_cast(this->min_values.size())); - std::vector ::const_iterator _iter300; - for (_iter300 = this->min_values.begin(); _iter300 != this->min_values.end(); ++_iter300) - { - xfer += oprot->writeBinary((*_iter300)); - } - xfer += oprot->writeListEnd(); - } - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("max_values", ::apache::thrift::protocol::T_LIST, 3); - { - xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRING, static_cast(this->max_values.size())); - std::vector ::const_iterator _iter301; - for (_iter301 = this->max_values.begin(); _iter301 != this->max_values.end(); ++_iter301) - { - xfer += oprot->writeBinary((*_iter301)); - } - xfer += oprot->writeListEnd(); - } - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("boundary_order", ::apache::thrift::protocol::T_I32, 4); - xfer += oprot->writeI32(static_cast(this->boundary_order)); - xfer += oprot->writeFieldEnd(); - - if (this->__isset.null_counts) { - xfer += oprot->writeFieldBegin("null_counts", ::apache::thrift::protocol::T_LIST, 5); - { - xfer += oprot->writeListBegin(::apache::thrift::protocol::T_I64, static_cast(this->null_counts.size())); - std::vector ::const_iterator _iter302; - for (_iter302 = this->null_counts.begin(); _iter302 != this->null_counts.end(); ++_iter302) - { - xfer += oprot->writeI64((*_iter302)); - } - xfer += oprot->writeListEnd(); - } - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.repetition_level_histograms) { - xfer += oprot->writeFieldBegin("repetition_level_histograms", ::apache::thrift::protocol::T_LIST, 6); - { - xfer += oprot->writeListBegin(::apache::thrift::protocol::T_I64, static_cast(this->repetition_level_histograms.size())); - std::vector ::const_iterator _iter303; - for (_iter303 = this->repetition_level_histograms.begin(); _iter303 != this->repetition_level_histograms.end(); ++_iter303) - { - xfer += oprot->writeI64((*_iter303)); - } - xfer += oprot->writeListEnd(); - } - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.definition_level_histograms) { - xfer += oprot->writeFieldBegin("definition_level_histograms", ::apache::thrift::protocol::T_LIST, 7); - { - xfer += oprot->writeListBegin(::apache::thrift::protocol::T_I64, static_cast(this->definition_level_histograms.size())); - std::vector ::const_iterator _iter304; - for (_iter304 = this->definition_level_histograms.begin(); _iter304 != this->definition_level_histograms.end(); ++_iter304) - { - xfer += oprot->writeI64((*_iter304)); - } - xfer += oprot->writeListEnd(); - } - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - void swap(ColumnIndex &a, ColumnIndex &b) { using ::std::swap; swap(a.null_pages, b.null_pages); @@ -7958,88 +3765,6 @@ std::ostream& operator<<(std::ostream& out, const AesGcmV1& obj) } -uint32_t AesGcmV1::read(::apache::thrift::protocol::TProtocol* iprot) { - - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::apache::thrift::protocol::T_STRING) { - xfer += iprot->readBinary(this->aad_prefix); - this->__isset.aad_prefix = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::apache::thrift::protocol::T_STRING) { - xfer += iprot->readBinary(this->aad_file_unique); - this->__isset.aad_file_unique = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 3: - if (ftype == ::apache::thrift::protocol::T_BOOL) { - xfer += iprot->readBool(this->supply_aad_prefix); - this->__isset.supply_aad_prefix = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t AesGcmV1::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("AesGcmV1"); - - if (this->__isset.aad_prefix) { - xfer += oprot->writeFieldBegin("aad_prefix", ::apache::thrift::protocol::T_STRING, 1); - xfer += oprot->writeBinary(this->aad_prefix); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.aad_file_unique) { - xfer += oprot->writeFieldBegin("aad_file_unique", ::apache::thrift::protocol::T_STRING, 2); - xfer += oprot->writeBinary(this->aad_file_unique); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.supply_aad_prefix) { - xfer += oprot->writeFieldBegin("supply_aad_prefix", ::apache::thrift::protocol::T_BOOL, 3); - xfer += oprot->writeBool(this->supply_aad_prefix); - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - void swap(AesGcmV1 &a, AesGcmV1 &b) { using ::std::swap; swap(a.aad_prefix, b.aad_prefix); @@ -8109,88 +3834,6 @@ std::ostream& operator<<(std::ostream& out, const AesGcmCtrV1& obj) } -uint32_t AesGcmCtrV1::read(::apache::thrift::protocol::TProtocol* iprot) { - - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::apache::thrift::protocol::T_STRING) { - xfer += iprot->readBinary(this->aad_prefix); - this->__isset.aad_prefix = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::apache::thrift::protocol::T_STRING) { - xfer += iprot->readBinary(this->aad_file_unique); - this->__isset.aad_file_unique = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 3: - if (ftype == ::apache::thrift::protocol::T_BOOL) { - xfer += iprot->readBool(this->supply_aad_prefix); - this->__isset.supply_aad_prefix = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t AesGcmCtrV1::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("AesGcmCtrV1"); - - if (this->__isset.aad_prefix) { - xfer += oprot->writeFieldBegin("aad_prefix", ::apache::thrift::protocol::T_STRING, 1); - xfer += oprot->writeBinary(this->aad_prefix); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.aad_file_unique) { - xfer += oprot->writeFieldBegin("aad_file_unique", ::apache::thrift::protocol::T_STRING, 2); - xfer += oprot->writeBinary(this->aad_file_unique); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.supply_aad_prefix) { - xfer += oprot->writeFieldBegin("supply_aad_prefix", ::apache::thrift::protocol::T_BOOL, 3); - xfer += oprot->writeBool(this->supply_aad_prefix); - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - void swap(AesGcmCtrV1 &a, AesGcmCtrV1 &b) { using ::std::swap; swap(a.aad_prefix, b.aad_prefix); @@ -8255,75 +3898,6 @@ std::ostream& operator<<(std::ostream& out, const EncryptionAlgorithm& obj) } -uint32_t EncryptionAlgorithm::read(::apache::thrift::protocol::TProtocol* iprot) { - - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->AES_GCM_V1.read(iprot); - this->__isset.AES_GCM_V1 = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->AES_GCM_CTR_V1.read(iprot); - this->__isset.AES_GCM_CTR_V1 = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t EncryptionAlgorithm::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("EncryptionAlgorithm"); - - if (this->__isset.AES_GCM_V1) { - xfer += oprot->writeFieldBegin("AES_GCM_V1", ::apache::thrift::protocol::T_STRUCT, 1); - xfer += this->AES_GCM_V1.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.AES_GCM_CTR_V1) { - xfer += oprot->writeFieldBegin("AES_GCM_CTR_V1", ::apache::thrift::protocol::T_STRUCT, 2); - xfer += this->AES_GCM_CTR_V1.write(oprot); - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - void swap(EncryptionAlgorithm &a, EncryptionAlgorithm &b) { using ::std::swap; swap(a.AES_GCM_V1, b.AES_GCM_V1); @@ -8413,254 +3987,6 @@ std::ostream& operator<<(std::ostream& out, const FileMetaData& obj) } -uint32_t FileMetaData::read(::apache::thrift::protocol::TProtocol* iprot) { - - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - bool isset_version = false; - bool isset_schema = false; - bool isset_num_rows = false; - bool isset_row_groups = false; - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->version); - isset_version = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::apache::thrift::protocol::T_LIST) { - { - this->schema.clear(); - uint32_t _size321; - ::apache::thrift::protocol::TType _etype324; - xfer += iprot->readListBegin(_etype324, _size321); - this->schema.resize(_size321); - uint32_t _i325; - for (_i325 = 0; _i325 < _size321; ++_i325) - { - xfer += this->schema[_i325].read(iprot); - } - xfer += iprot->readListEnd(); - } - isset_schema = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 3: - if (ftype == ::apache::thrift::protocol::T_I64) { - xfer += iprot->readI64(this->num_rows); - isset_num_rows = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 4: - if (ftype == ::apache::thrift::protocol::T_LIST) { - { - this->row_groups.clear(); - uint32_t _size326; - ::apache::thrift::protocol::TType _etype329; - xfer += iprot->readListBegin(_etype329, _size326); - this->row_groups.resize(_size326); - uint32_t _i330; - for (_i330 = 0; _i330 < _size326; ++_i330) - { - xfer += this->row_groups[_i330].read(iprot); - } - xfer += iprot->readListEnd(); - } - isset_row_groups = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 5: - if (ftype == ::apache::thrift::protocol::T_LIST) { - { - this->key_value_metadata.clear(); - uint32_t _size331; - ::apache::thrift::protocol::TType _etype334; - xfer += iprot->readListBegin(_etype334, _size331); - this->key_value_metadata.resize(_size331); - uint32_t _i335; - for (_i335 = 0; _i335 < _size331; ++_i335) - { - xfer += this->key_value_metadata[_i335].read(iprot); - } - xfer += iprot->readListEnd(); - } - this->__isset.key_value_metadata = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 6: - if (ftype == ::apache::thrift::protocol::T_STRING) { - xfer += iprot->readString(this->created_by); - this->__isset.created_by = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 7: - if (ftype == ::apache::thrift::protocol::T_LIST) { - { - this->column_orders.clear(); - uint32_t _size336; - ::apache::thrift::protocol::TType _etype339; - xfer += iprot->readListBegin(_etype339, _size336); - this->column_orders.resize(_size336); - uint32_t _i340; - for (_i340 = 0; _i340 < _size336; ++_i340) - { - xfer += this->column_orders[_i340].read(iprot); - } - xfer += iprot->readListEnd(); - } - this->__isset.column_orders = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 8: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->encryption_algorithm.read(iprot); - this->__isset.encryption_algorithm = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 9: - if (ftype == ::apache::thrift::protocol::T_STRING) { - xfer += iprot->readBinary(this->footer_signing_key_metadata); - this->__isset.footer_signing_key_metadata = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - if (!isset_version) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_schema) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_num_rows) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_row_groups) - throw TProtocolException(TProtocolException::INVALID_DATA); - return xfer; -} - -uint32_t FileMetaData::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("FileMetaData"); - - xfer += oprot->writeFieldBegin("version", ::apache::thrift::protocol::T_I32, 1); - xfer += oprot->writeI32(this->version); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("schema", ::apache::thrift::protocol::T_LIST, 2); - { - xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast(this->schema.size())); - std::vector ::const_iterator _iter341; - for (_iter341 = this->schema.begin(); _iter341 != this->schema.end(); ++_iter341) - { - xfer += (*_iter341).write(oprot); - } - xfer += oprot->writeListEnd(); - } - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("num_rows", ::apache::thrift::protocol::T_I64, 3); - xfer += oprot->writeI64(this->num_rows); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("row_groups", ::apache::thrift::protocol::T_LIST, 4); - { - xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast(this->row_groups.size())); - std::vector ::const_iterator _iter342; - for (_iter342 = this->row_groups.begin(); _iter342 != this->row_groups.end(); ++_iter342) - { - xfer += (*_iter342).write(oprot); - } - xfer += oprot->writeListEnd(); - } - xfer += oprot->writeFieldEnd(); - - if (this->__isset.key_value_metadata) { - xfer += oprot->writeFieldBegin("key_value_metadata", ::apache::thrift::protocol::T_LIST, 5); - { - xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast(this->key_value_metadata.size())); - std::vector ::const_iterator _iter343; - for (_iter343 = this->key_value_metadata.begin(); _iter343 != this->key_value_metadata.end(); ++_iter343) - { - xfer += (*_iter343).write(oprot); - } - xfer += oprot->writeListEnd(); - } - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.created_by) { - xfer += oprot->writeFieldBegin("created_by", ::apache::thrift::protocol::T_STRING, 6); - xfer += oprot->writeString(this->created_by); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.column_orders) { - xfer += oprot->writeFieldBegin("column_orders", ::apache::thrift::protocol::T_LIST, 7); - { - xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast(this->column_orders.size())); - std::vector ::const_iterator _iter344; - for (_iter344 = this->column_orders.begin(); _iter344 != this->column_orders.end(); ++_iter344) - { - xfer += (*_iter344).write(oprot); - } - xfer += oprot->writeListEnd(); - } - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.encryption_algorithm) { - xfer += oprot->writeFieldBegin("encryption_algorithm", ::apache::thrift::protocol::T_STRUCT, 8); - xfer += this->encryption_algorithm.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.footer_signing_key_metadata) { - xfer += oprot->writeFieldBegin("footer_signing_key_metadata", ::apache::thrift::protocol::T_STRING, 9); - xfer += oprot->writeBinary(this->footer_signing_key_metadata); - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - void swap(FileMetaData &a, FileMetaData &b) { using ::std::swap; swap(a.version, b.version); @@ -8760,77 +4086,6 @@ std::ostream& operator<<(std::ostream& out, const FileCryptoMetaData& obj) } -uint32_t FileCryptoMetaData::read(::apache::thrift::protocol::TProtocol* iprot) { - - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - bool isset_encryption_algorithm = false; - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->encryption_algorithm.read(iprot); - isset_encryption_algorithm = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::apache::thrift::protocol::T_STRING) { - xfer += iprot->readBinary(this->key_metadata); - this->__isset.key_metadata = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - if (!isset_encryption_algorithm) - throw TProtocolException(TProtocolException::INVALID_DATA); - return xfer; -} - -uint32_t FileCryptoMetaData::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("FileCryptoMetaData"); - - xfer += oprot->writeFieldBegin("encryption_algorithm", ::apache::thrift::protocol::T_STRUCT, 1); - xfer += this->encryption_algorithm.write(oprot); - xfer += oprot->writeFieldEnd(); - - if (this->__isset.key_metadata) { - xfer += oprot->writeFieldBegin("key_metadata", ::apache::thrift::protocol::T_STRING, 2); - xfer += oprot->writeBinary(this->key_metadata); - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - void swap(FileCryptoMetaData &a, FileCryptoMetaData &b) { using ::std::swap; swap(a.encryption_algorithm, b.encryption_algorithm); diff --git a/cpp/src/generated/parquet_types.h b/cpp/src/generated/parquet_types.h index 9dc6794c4030b..6cf85fe5e73cc 100644 --- a/cpp/src/generated/parquet_types.h +++ b/cpp/src/generated/parquet_types.h @@ -466,7 +466,7 @@ typedef struct _SizeStatistics__isset { * in this structure can help determine the number of nulls at a particular * nesting level and maximum length of lists). */ -class SizeStatistics : public virtual ::apache::thrift::TBase { +class SizeStatistics { public: SizeStatistics(const SizeStatistics&); @@ -546,8 +546,10 @@ class SizeStatistics : public virtual ::apache::thrift::TBase { bool operator < (const SizeStatistics & ) const; - uint32_t read(::apache::thrift::protocol::TProtocol* iprot) override; - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const override; + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; virtual void printTo(std::ostream& out) const; }; @@ -572,7 +574,7 @@ typedef struct _Statistics__isset { * Statistics per row group and per page * All fields are optional. */ -class Statistics : public virtual ::apache::thrift::TBase { +class Statistics { public: Statistics(const Statistics&); @@ -697,8 +699,10 @@ class Statistics : public virtual ::apache::thrift::TBase { bool operator < (const Statistics & ) const; - uint32_t read(::apache::thrift::protocol::TProtocol* iprot) override; - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const override; + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; virtual void printTo(std::ostream& out) const; }; @@ -711,7 +715,7 @@ std::ostream& operator<<(std::ostream& out, const Statistics& obj); /** * Empty structs to use as logical type annotations */ -class StringType : public virtual ::apache::thrift::TBase { +class StringType { public: StringType(const StringType&) noexcept; @@ -733,8 +737,10 @@ class StringType : public virtual ::apache::thrift::TBase { bool operator < (const StringType & ) const; - uint32_t read(::apache::thrift::protocol::TProtocol* iprot) override; - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const override; + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; virtual void printTo(std::ostream& out) const; }; @@ -744,7 +750,7 @@ void swap(StringType &a, StringType &b); std::ostream& operator<<(std::ostream& out, const StringType& obj); -class UUIDType : public virtual ::apache::thrift::TBase { +class UUIDType { public: UUIDType(const UUIDType&) noexcept; @@ -766,8 +772,10 @@ class UUIDType : public virtual ::apache::thrift::TBase { bool operator < (const UUIDType & ) const; - uint32_t read(::apache::thrift::protocol::TProtocol* iprot) override; - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const override; + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; virtual void printTo(std::ostream& out) const; }; @@ -777,7 +785,7 @@ void swap(UUIDType &a, UUIDType &b); std::ostream& operator<<(std::ostream& out, const UUIDType& obj); -class MapType : public virtual ::apache::thrift::TBase { +class MapType { public: MapType(const MapType&) noexcept; @@ -799,8 +807,10 @@ class MapType : public virtual ::apache::thrift::TBase { bool operator < (const MapType & ) const; - uint32_t read(::apache::thrift::protocol::TProtocol* iprot) override; - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const override; + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; virtual void printTo(std::ostream& out) const; }; @@ -810,7 +820,7 @@ void swap(MapType &a, MapType &b); std::ostream& operator<<(std::ostream& out, const MapType& obj); -class ListType : public virtual ::apache::thrift::TBase { +class ListType { public: ListType(const ListType&) noexcept; @@ -832,8 +842,10 @@ class ListType : public virtual ::apache::thrift::TBase { bool operator < (const ListType & ) const; - uint32_t read(::apache::thrift::protocol::TProtocol* iprot) override; - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const override; + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; virtual void printTo(std::ostream& out) const; }; @@ -843,7 +855,7 @@ void swap(ListType &a, ListType &b); std::ostream& operator<<(std::ostream& out, const ListType& obj); -class EnumType : public virtual ::apache::thrift::TBase { +class EnumType { public: EnumType(const EnumType&) noexcept; @@ -865,8 +877,10 @@ class EnumType : public virtual ::apache::thrift::TBase { bool operator < (const EnumType & ) const; - uint32_t read(::apache::thrift::protocol::TProtocol* iprot) override; - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const override; + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; virtual void printTo(std::ostream& out) const; }; @@ -876,7 +890,7 @@ void swap(EnumType &a, EnumType &b); std::ostream& operator<<(std::ostream& out, const EnumType& obj); -class DateType : public virtual ::apache::thrift::TBase { +class DateType { public: DateType(const DateType&) noexcept; @@ -898,8 +912,10 @@ class DateType : public virtual ::apache::thrift::TBase { bool operator < (const DateType & ) const; - uint32_t read(::apache::thrift::protocol::TProtocol* iprot) override; - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const override; + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; virtual void printTo(std::ostream& out) const; }; @@ -909,7 +925,7 @@ void swap(DateType &a, DateType &b); std::ostream& operator<<(std::ostream& out, const DateType& obj); -class Float16Type : public virtual ::apache::thrift::TBase { +class Float16Type { public: Float16Type(const Float16Type&) noexcept; @@ -931,8 +947,10 @@ class Float16Type : public virtual ::apache::thrift::TBase { bool operator < (const Float16Type & ) const; - uint32_t read(::apache::thrift::protocol::TProtocol* iprot) override; - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const override; + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; virtual void printTo(std::ostream& out) const; }; @@ -949,7 +967,7 @@ std::ostream& operator<<(std::ostream& out, const Float16Type& obj); * null and the physical type can't be determined. This annotation signals * the case where the physical type was guessed from all null values. */ -class NullType : public virtual ::apache::thrift::TBase { +class NullType { public: NullType(const NullType&) noexcept; @@ -971,8 +989,10 @@ class NullType : public virtual ::apache::thrift::TBase { bool operator < (const NullType & ) const; - uint32_t read(::apache::thrift::protocol::TProtocol* iprot) override; - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const override; + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; virtual void printTo(std::ostream& out) const; }; @@ -993,7 +1013,7 @@ std::ostream& operator<<(std::ostream& out, const NullType& obj); * * Allowed for physical types: INT32, INT64, FIXED, and BINARY */ -class DecimalType : public virtual ::apache::thrift::TBase { +class DecimalType { public: DecimalType(const DecimalType&) noexcept; @@ -1027,8 +1047,10 @@ class DecimalType : public virtual ::apache::thrift::TBase { bool operator < (const DecimalType & ) const; - uint32_t read(::apache::thrift::protocol::TProtocol* iprot) override; - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const override; + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; virtual void printTo(std::ostream& out) const; }; @@ -1041,7 +1063,7 @@ std::ostream& operator<<(std::ostream& out, const DecimalType& obj); /** * Time units for logical types */ -class MilliSeconds : public virtual ::apache::thrift::TBase { +class MilliSeconds { public: MilliSeconds(const MilliSeconds&) noexcept; @@ -1063,8 +1085,10 @@ class MilliSeconds : public virtual ::apache::thrift::TBase { bool operator < (const MilliSeconds & ) const; - uint32_t read(::apache::thrift::protocol::TProtocol* iprot) override; - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const override; + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; virtual void printTo(std::ostream& out) const; }; @@ -1074,7 +1098,7 @@ void swap(MilliSeconds &a, MilliSeconds &b); std::ostream& operator<<(std::ostream& out, const MilliSeconds& obj); -class MicroSeconds : public virtual ::apache::thrift::TBase { +class MicroSeconds { public: MicroSeconds(const MicroSeconds&) noexcept; @@ -1096,8 +1120,10 @@ class MicroSeconds : public virtual ::apache::thrift::TBase { bool operator < (const MicroSeconds & ) const; - uint32_t read(::apache::thrift::protocol::TProtocol* iprot) override; - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const override; + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; virtual void printTo(std::ostream& out) const; }; @@ -1107,7 +1133,7 @@ void swap(MicroSeconds &a, MicroSeconds &b); std::ostream& operator<<(std::ostream& out, const MicroSeconds& obj); -class NanoSeconds : public virtual ::apache::thrift::TBase { +class NanoSeconds { public: NanoSeconds(const NanoSeconds&) noexcept; @@ -1129,8 +1155,10 @@ class NanoSeconds : public virtual ::apache::thrift::TBase { bool operator < (const NanoSeconds & ) const; - uint32_t read(::apache::thrift::protocol::TProtocol* iprot) override; - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const override; + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; virtual void printTo(std::ostream& out) const; }; @@ -1146,7 +1174,7 @@ typedef struct _TimeUnit__isset { bool NANOS :1; } _TimeUnit__isset; -class TimeUnit : public virtual ::apache::thrift::TBase { +class TimeUnit { public: TimeUnit(const TimeUnit&) noexcept; @@ -1191,8 +1219,10 @@ class TimeUnit : public virtual ::apache::thrift::TBase { bool operator < (const TimeUnit & ) const; - uint32_t read(::apache::thrift::protocol::TProtocol* iprot) override; - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const override; + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; virtual void printTo(std::ostream& out) const; }; @@ -1207,7 +1237,7 @@ std::ostream& operator<<(std::ostream& out, const TimeUnit& obj); * * Allowed for physical types: INT64 */ -class TimestampType : public virtual ::apache::thrift::TBase { +class TimestampType { public: TimestampType(const TimestampType&) noexcept; @@ -1240,8 +1270,10 @@ class TimestampType : public virtual ::apache::thrift::TBase { bool operator < (const TimestampType & ) const; - uint32_t read(::apache::thrift::protocol::TProtocol* iprot) override; - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const override; + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; virtual void printTo(std::ostream& out) const; }; @@ -1256,7 +1288,7 @@ std::ostream& operator<<(std::ostream& out, const TimestampType& obj); * * Allowed for physical types: INT32 (millis), INT64 (micros, nanos) */ -class TimeType : public virtual ::apache::thrift::TBase { +class TimeType { public: TimeType(const TimeType&) noexcept; @@ -1289,8 +1321,10 @@ class TimeType : public virtual ::apache::thrift::TBase { bool operator < (const TimeType & ) const; - uint32_t read(::apache::thrift::protocol::TProtocol* iprot) override; - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const override; + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; virtual void printTo(std::ostream& out) const; }; @@ -1307,7 +1341,7 @@ std::ostream& operator<<(std::ostream& out, const TimeType& obj); * * Allowed for physical types: INT32, INT64 */ -class IntType : public virtual ::apache::thrift::TBase { +class IntType { public: IntType(const IntType&) noexcept; @@ -1341,8 +1375,10 @@ class IntType : public virtual ::apache::thrift::TBase { bool operator < (const IntType & ) const; - uint32_t read(::apache::thrift::protocol::TProtocol* iprot) override; - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const override; + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; virtual void printTo(std::ostream& out) const; }; @@ -1357,7 +1393,7 @@ std::ostream& operator<<(std::ostream& out, const IntType& obj); * * Allowed for physical types: BINARY */ -class JsonType : public virtual ::apache::thrift::TBase { +class JsonType { public: JsonType(const JsonType&) noexcept; @@ -1379,8 +1415,10 @@ class JsonType : public virtual ::apache::thrift::TBase { bool operator < (const JsonType & ) const; - uint32_t read(::apache::thrift::protocol::TProtocol* iprot) override; - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const override; + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; virtual void printTo(std::ostream& out) const; }; @@ -1395,7 +1433,7 @@ std::ostream& operator<<(std::ostream& out, const JsonType& obj); * * Allowed for physical types: BINARY */ -class BsonType : public virtual ::apache::thrift::TBase { +class BsonType { public: BsonType(const BsonType&) noexcept; @@ -1417,8 +1455,10 @@ class BsonType : public virtual ::apache::thrift::TBase { bool operator < (const BsonType & ) const; - uint32_t read(::apache::thrift::protocol::TProtocol* iprot) override; - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const override; + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; virtual void printTo(std::ostream& out) const; }; @@ -1452,7 +1492,7 @@ typedef struct _LogicalType__isset { * SchemaElement must also set the corresponding ConvertedType (if any) * from the following table. */ -class LogicalType : public virtual ::apache::thrift::TBase { +class LogicalType { public: LogicalType(const LogicalType&) noexcept; @@ -1574,8 +1614,10 @@ class LogicalType : public virtual ::apache::thrift::TBase { bool operator < (const LogicalType & ) const; - uint32_t read(::apache::thrift::protocol::TProtocol* iprot) override; - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const override; + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; virtual void printTo(std::ostream& out) const; }; @@ -1603,7 +1645,7 @@ typedef struct _SchemaElement__isset { * - if it is a primitive type (leaf) then type is defined and num_children is undefined * the nodes are listed in depth first traversal order. */ -class SchemaElement : public virtual ::apache::thrift::TBase { +class SchemaElement { public: SchemaElement(const SchemaElement&); @@ -1754,8 +1796,10 @@ class SchemaElement : public virtual ::apache::thrift::TBase { bool operator < (const SchemaElement & ) const; - uint32_t read(::apache::thrift::protocol::TProtocol* iprot) override; - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const override; + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; virtual void printTo(std::ostream& out) const; }; @@ -1772,7 +1816,7 @@ typedef struct _DataPageHeader__isset { /** * Data page header */ -class DataPageHeader : public virtual ::apache::thrift::TBase { +class DataPageHeader { public: DataPageHeader(const DataPageHeader&); @@ -1848,8 +1892,10 @@ class DataPageHeader : public virtual ::apache::thrift::TBase { bool operator < (const DataPageHeader & ) const; - uint32_t read(::apache::thrift::protocol::TProtocol* iprot) override; - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const override; + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; virtual void printTo(std::ostream& out) const; }; @@ -1859,7 +1905,7 @@ void swap(DataPageHeader &a, DataPageHeader &b); std::ostream& operator<<(std::ostream& out, const DataPageHeader& obj); -class IndexPageHeader : public virtual ::apache::thrift::TBase { +class IndexPageHeader { public: IndexPageHeader(const IndexPageHeader&) noexcept; @@ -1881,8 +1927,10 @@ class IndexPageHeader : public virtual ::apache::thrift::TBase { bool operator < (const IndexPageHeader & ) const; - uint32_t read(::apache::thrift::protocol::TProtocol* iprot) override; - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const override; + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; virtual void printTo(std::ostream& out) const; }; @@ -1902,7 +1950,7 @@ typedef struct _DictionaryPageHeader__isset { * can be placed in a column chunk. * */ -class DictionaryPageHeader : public virtual ::apache::thrift::TBase { +class DictionaryPageHeader { public: DictionaryPageHeader(const DictionaryPageHeader&) noexcept; @@ -1957,8 +2005,10 @@ class DictionaryPageHeader : public virtual ::apache::thrift::TBase { bool operator < (const DictionaryPageHeader & ) const; - uint32_t read(::apache::thrift::protocol::TProtocol* iprot) override; - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const override; + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; virtual void printTo(std::ostream& out) const; }; @@ -1979,7 +2029,7 @@ typedef struct _DataPageHeaderV2__isset { * The remaining section containing the data is compressed if is_compressed is true * */ -class DataPageHeaderV2 : public virtual ::apache::thrift::TBase { +class DataPageHeaderV2 { public: DataPageHeaderV2(const DataPageHeaderV2&); @@ -2085,8 +2135,10 @@ class DataPageHeaderV2 : public virtual ::apache::thrift::TBase { bool operator < (const DataPageHeaderV2 & ) const; - uint32_t read(::apache::thrift::protocol::TProtocol* iprot) override; - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const override; + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; virtual void printTo(std::ostream& out) const; }; @@ -2099,7 +2151,7 @@ std::ostream& operator<<(std::ostream& out, const DataPageHeaderV2& obj); /** * Block-based algorithm type annotation. * */ -class SplitBlockAlgorithm : public virtual ::apache::thrift::TBase { +class SplitBlockAlgorithm { public: SplitBlockAlgorithm(const SplitBlockAlgorithm&) noexcept; @@ -2121,8 +2173,10 @@ class SplitBlockAlgorithm : public virtual ::apache::thrift::TBase { bool operator < (const SplitBlockAlgorithm & ) const; - uint32_t read(::apache::thrift::protocol::TProtocol* iprot) override; - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const override; + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; virtual void printTo(std::ostream& out) const; }; @@ -2139,7 +2193,7 @@ typedef struct _BloomFilterAlgorithm__isset { /** * The algorithm used in Bloom filter. * */ -class BloomFilterAlgorithm : public virtual ::apache::thrift::TBase { +class BloomFilterAlgorithm { public: BloomFilterAlgorithm(const BloomFilterAlgorithm&) noexcept; @@ -2173,8 +2227,10 @@ class BloomFilterAlgorithm : public virtual ::apache::thrift::TBase { bool operator < (const BloomFilterAlgorithm & ) const; - uint32_t read(::apache::thrift::protocol::TProtocol* iprot) override; - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const override; + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; virtual void printTo(std::ostream& out) const; }; @@ -2189,7 +2245,7 @@ std::ostream& operator<<(std::ostream& out, const BloomFilterAlgorithm& obj); * algorithm. It uses 64 bits version of xxHash. * */ -class XxHash : public virtual ::apache::thrift::TBase { +class XxHash { public: XxHash(const XxHash&) noexcept; @@ -2211,8 +2267,10 @@ class XxHash : public virtual ::apache::thrift::TBase { bool operator < (const XxHash & ) const; - uint32_t read(::apache::thrift::protocol::TProtocol* iprot) override; - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const override; + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; virtual void printTo(std::ostream& out) const; }; @@ -2231,7 +2289,7 @@ typedef struct _BloomFilterHash__isset { * using plain encoding. * */ -class BloomFilterHash : public virtual ::apache::thrift::TBase { +class BloomFilterHash { public: BloomFilterHash(const BloomFilterHash&) noexcept; @@ -2265,8 +2323,10 @@ class BloomFilterHash : public virtual ::apache::thrift::TBase { bool operator < (const BloomFilterHash & ) const; - uint32_t read(::apache::thrift::protocol::TProtocol* iprot) override; - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const override; + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; virtual void printTo(std::ostream& out) const; }; @@ -2280,7 +2340,7 @@ std::ostream& operator<<(std::ostream& out, const BloomFilterHash& obj); * The compression used in the Bloom filter. * */ -class Uncompressed : public virtual ::apache::thrift::TBase { +class Uncompressed { public: Uncompressed(const Uncompressed&) noexcept; @@ -2302,8 +2362,10 @@ class Uncompressed : public virtual ::apache::thrift::TBase { bool operator < (const Uncompressed & ) const; - uint32_t read(::apache::thrift::protocol::TProtocol* iprot) override; - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const override; + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; virtual void printTo(std::ostream& out) const; }; @@ -2317,7 +2379,7 @@ typedef struct _BloomFilterCompression__isset { bool UNCOMPRESSED :1; } _BloomFilterCompression__isset; -class BloomFilterCompression : public virtual ::apache::thrift::TBase { +class BloomFilterCompression { public: BloomFilterCompression(const BloomFilterCompression&) noexcept; @@ -2348,8 +2410,10 @@ class BloomFilterCompression : public virtual ::apache::thrift::TBase { bool operator < (const BloomFilterCompression & ) const; - uint32_t read(::apache::thrift::protocol::TProtocol* iprot) override; - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const override; + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; virtual void printTo(std::ostream& out) const; }; @@ -2364,7 +2428,7 @@ std::ostream& operator<<(std::ostream& out, const BloomFilterCompression& obj); * and followed by its bitset. * */ -class BloomFilterHeader : public virtual ::apache::thrift::TBase { +class BloomFilterHeader { public: BloomFilterHeader(const BloomFilterHeader&) noexcept; @@ -2419,8 +2483,10 @@ class BloomFilterHeader : public virtual ::apache::thrift::TBase { bool operator < (const BloomFilterHeader & ) const; - uint32_t read(::apache::thrift::protocol::TProtocol* iprot) override; - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const override; + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; virtual void printTo(std::ostream& out) const; }; @@ -2438,7 +2504,7 @@ typedef struct _PageHeader__isset { bool data_page_header_v2 :1; } _PageHeader__isset; -class PageHeader : public virtual ::apache::thrift::TBase { +class PageHeader { public: PageHeader(const PageHeader&); @@ -2545,8 +2611,10 @@ class PageHeader : public virtual ::apache::thrift::TBase { bool operator < (const PageHeader & ) const; - uint32_t read(::apache::thrift::protocol::TProtocol* iprot) override; - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const override; + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; virtual void printTo(std::ostream& out) const; }; @@ -2563,7 +2631,7 @@ typedef struct _KeyValue__isset { /** * Wrapper struct to store key values */ -class KeyValue : public virtual ::apache::thrift::TBase { +class KeyValue { public: KeyValue(const KeyValue&); @@ -2601,8 +2669,10 @@ class KeyValue : public virtual ::apache::thrift::TBase { bool operator < (const KeyValue & ) const; - uint32_t read(::apache::thrift::protocol::TProtocol* iprot) override; - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const override; + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; virtual void printTo(std::ostream& out) const; }; @@ -2615,7 +2685,7 @@ std::ostream& operator<<(std::ostream& out, const KeyValue& obj); /** * Wrapper struct to specify sort order */ -class SortingColumn : public virtual ::apache::thrift::TBase { +class SortingColumn { public: SortingColumn(const SortingColumn&) noexcept; @@ -2665,8 +2735,10 @@ class SortingColumn : public virtual ::apache::thrift::TBase { bool operator < (const SortingColumn & ) const; - uint32_t read(::apache::thrift::protocol::TProtocol* iprot) override; - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const override; + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; virtual void printTo(std::ostream& out) const; }; @@ -2679,7 +2751,7 @@ std::ostream& operator<<(std::ostream& out, const SortingColumn& obj); /** * statistics of a given page type and encoding */ -class PageEncodingStats : public virtual ::apache::thrift::TBase { +class PageEncodingStats { public: PageEncodingStats(const PageEncodingStats&) noexcept; @@ -2732,8 +2804,10 @@ class PageEncodingStats : public virtual ::apache::thrift::TBase { bool operator < (const PageEncodingStats & ) const; - uint32_t read(::apache::thrift::protocol::TProtocol* iprot) override; - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const override; + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; virtual void printTo(std::ostream& out) const; }; @@ -2757,7 +2831,7 @@ typedef struct _ColumnMetaData__isset { /** * Description for column metadata */ -class ColumnMetaData : public virtual ::apache::thrift::TBase { +class ColumnMetaData { public: ColumnMetaData(const ColumnMetaData&); @@ -2950,8 +3024,10 @@ class ColumnMetaData : public virtual ::apache::thrift::TBase { bool operator < (const ColumnMetaData & ) const; - uint32_t read(::apache::thrift::protocol::TProtocol* iprot) override; - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const override; + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; virtual void printTo(std::ostream& out) const; }; @@ -2961,7 +3037,7 @@ void swap(ColumnMetaData &a, ColumnMetaData &b); std::ostream& operator<<(std::ostream& out, const ColumnMetaData& obj); -class EncryptionWithFooterKey : public virtual ::apache::thrift::TBase { +class EncryptionWithFooterKey { public: EncryptionWithFooterKey(const EncryptionWithFooterKey&) noexcept; @@ -2983,8 +3059,10 @@ class EncryptionWithFooterKey : public virtual ::apache::thrift::TBase { bool operator < (const EncryptionWithFooterKey & ) const; - uint32_t read(::apache::thrift::protocol::TProtocol* iprot) override; - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const override; + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; virtual void printTo(std::ostream& out) const; }; @@ -2998,7 +3076,7 @@ typedef struct _EncryptionWithColumnKey__isset { bool key_metadata :1; } _EncryptionWithColumnKey__isset; -class EncryptionWithColumnKey : public virtual ::apache::thrift::TBase { +class EncryptionWithColumnKey { public: EncryptionWithColumnKey(const EncryptionWithColumnKey&); @@ -3041,8 +3119,10 @@ class EncryptionWithColumnKey : public virtual ::apache::thrift::TBase { bool operator < (const EncryptionWithColumnKey & ) const; - uint32_t read(::apache::thrift::protocol::TProtocol* iprot) override; - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const override; + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; virtual void printTo(std::ostream& out) const; }; @@ -3057,7 +3137,7 @@ typedef struct _ColumnCryptoMetaData__isset { bool ENCRYPTION_WITH_COLUMN_KEY :1; } _ColumnCryptoMetaData__isset; -class ColumnCryptoMetaData : public virtual ::apache::thrift::TBase { +class ColumnCryptoMetaData { public: ColumnCryptoMetaData(const ColumnCryptoMetaData&); @@ -3095,8 +3175,10 @@ class ColumnCryptoMetaData : public virtual ::apache::thrift::TBase { bool operator < (const ColumnCryptoMetaData & ) const; - uint32_t read(::apache::thrift::protocol::TProtocol* iprot) override; - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const override; + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; virtual void printTo(std::ostream& out) const; }; @@ -3117,7 +3199,7 @@ typedef struct _ColumnChunk__isset { bool encrypted_column_metadata :1; } _ColumnChunk__isset; -class ColumnChunk : public virtual ::apache::thrift::TBase { +class ColumnChunk { public: ColumnChunk(const ColumnChunk&); @@ -3241,8 +3323,10 @@ class ColumnChunk : public virtual ::apache::thrift::TBase { bool operator < (const ColumnChunk & ) const; - uint32_t read(::apache::thrift::protocol::TProtocol* iprot) override; - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const override; + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; virtual void printTo(std::ostream& out) const; }; @@ -3259,7 +3343,7 @@ typedef struct _RowGroup__isset { bool ordinal :1; } _RowGroup__isset; -class RowGroup : public virtual ::apache::thrift::TBase { +class RowGroup { public: RowGroup(const RowGroup&); @@ -3357,8 +3441,10 @@ class RowGroup : public virtual ::apache::thrift::TBase { bool operator < (const RowGroup & ) const; - uint32_t read(::apache::thrift::protocol::TProtocol* iprot) override; - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const override; + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; virtual void printTo(std::ostream& out) const; }; @@ -3371,7 +3457,7 @@ std::ostream& operator<<(std::ostream& out, const RowGroup& obj); /** * Empty struct to signal the order defined by the physical or logical type */ -class TypeDefinedOrder : public virtual ::apache::thrift::TBase { +class TypeDefinedOrder { public: TypeDefinedOrder(const TypeDefinedOrder&) noexcept; @@ -3393,8 +3479,10 @@ class TypeDefinedOrder : public virtual ::apache::thrift::TBase { bool operator < (const TypeDefinedOrder & ) const; - uint32_t read(::apache::thrift::protocol::TProtocol* iprot) override; - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const override; + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; virtual void printTo(std::ostream& out) const; }; @@ -3420,7 +3508,7 @@ typedef struct _ColumnOrder__isset { * If the reader does not support the value of this union, min and max stats * for this column should be ignored. */ -class ColumnOrder : public virtual ::apache::thrift::TBase { +class ColumnOrder { public: ColumnOrder(const ColumnOrder&) noexcept; @@ -3501,8 +3589,10 @@ class ColumnOrder : public virtual ::apache::thrift::TBase { bool operator < (const ColumnOrder & ) const; - uint32_t read(::apache::thrift::protocol::TProtocol* iprot) override; - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const override; + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; virtual void printTo(std::ostream& out) const; }; @@ -3512,7 +3602,7 @@ void swap(ColumnOrder &a, ColumnOrder &b); std::ostream& operator<<(std::ostream& out, const ColumnOrder& obj); -class PageLocation : public virtual ::apache::thrift::TBase { +class PageLocation { public: PageLocation(const PageLocation&) noexcept; @@ -3563,8 +3653,10 @@ class PageLocation : public virtual ::apache::thrift::TBase { bool operator < (const PageLocation & ) const; - uint32_t read(::apache::thrift::protocol::TProtocol* iprot) override; - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const override; + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; virtual void printTo(std::ostream& out) const; }; @@ -3578,7 +3670,7 @@ typedef struct _OffsetIndex__isset { bool unencoded_byte_array_data_bytes :1; } _OffsetIndex__isset; -class OffsetIndex : public virtual ::apache::thrift::TBase { +class OffsetIndex { public: OffsetIndex(const OffsetIndex&); @@ -3624,8 +3716,10 @@ class OffsetIndex : public virtual ::apache::thrift::TBase { bool operator < (const OffsetIndex & ) const; - uint32_t read(::apache::thrift::protocol::TProtocol* iprot) override; - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const override; + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; virtual void printTo(std::ostream& out) const; }; @@ -3645,7 +3739,7 @@ typedef struct _ColumnIndex__isset { * Description for ColumnIndex. * Each [i] refers to the page at OffsetIndex.page_locations[i] */ -class ColumnIndex : public virtual ::apache::thrift::TBase { +class ColumnIndex { public: ColumnIndex(const ColumnIndex&); @@ -3756,8 +3850,10 @@ class ColumnIndex : public virtual ::apache::thrift::TBase { bool operator < (const ColumnIndex & ) const; - uint32_t read(::apache::thrift::protocol::TProtocol* iprot) override; - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const override; + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; virtual void printTo(std::ostream& out) const; }; @@ -3773,7 +3869,7 @@ typedef struct _AesGcmV1__isset { bool supply_aad_prefix :1; } _AesGcmV1__isset; -class AesGcmV1 : public virtual ::apache::thrift::TBase { +class AesGcmV1 { public: AesGcmV1(const AesGcmV1&); @@ -3831,8 +3927,10 @@ class AesGcmV1 : public virtual ::apache::thrift::TBase { bool operator < (const AesGcmV1 & ) const; - uint32_t read(::apache::thrift::protocol::TProtocol* iprot) override; - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const override; + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; virtual void printTo(std::ostream& out) const; }; @@ -3848,7 +3946,7 @@ typedef struct _AesGcmCtrV1__isset { bool supply_aad_prefix :1; } _AesGcmCtrV1__isset; -class AesGcmCtrV1 : public virtual ::apache::thrift::TBase { +class AesGcmCtrV1 { public: AesGcmCtrV1(const AesGcmCtrV1&); @@ -3906,8 +4004,10 @@ class AesGcmCtrV1 : public virtual ::apache::thrift::TBase { bool operator < (const AesGcmCtrV1 & ) const; - uint32_t read(::apache::thrift::protocol::TProtocol* iprot) override; - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const override; + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; virtual void printTo(std::ostream& out) const; }; @@ -3922,7 +4022,7 @@ typedef struct _EncryptionAlgorithm__isset { bool AES_GCM_CTR_V1 :1; } _EncryptionAlgorithm__isset; -class EncryptionAlgorithm : public virtual ::apache::thrift::TBase { +class EncryptionAlgorithm { public: EncryptionAlgorithm(const EncryptionAlgorithm&); @@ -3960,8 +4060,10 @@ class EncryptionAlgorithm : public virtual ::apache::thrift::TBase { bool operator < (const EncryptionAlgorithm & ) const; - uint32_t read(::apache::thrift::protocol::TProtocol* iprot) override; - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const override; + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; virtual void printTo(std::ostream& out) const; }; @@ -3982,7 +4084,7 @@ typedef struct _FileMetaData__isset { /** * Description for file metadata */ -class FileMetaData : public virtual ::apache::thrift::TBase { +class FileMetaData { public: FileMetaData(const FileMetaData&); @@ -4116,8 +4218,10 @@ class FileMetaData : public virtual ::apache::thrift::TBase { bool operator < (const FileMetaData & ) const; - uint32_t read(::apache::thrift::protocol::TProtocol* iprot) override; - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const override; + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; virtual void printTo(std::ostream& out) const; }; @@ -4134,7 +4238,7 @@ typedef struct _FileCryptoMetaData__isset { /** * Crypto metadata for files with encrypted footer * */ -class FileCryptoMetaData : public virtual ::apache::thrift::TBase { +class FileCryptoMetaData { public: FileCryptoMetaData(const FileCryptoMetaData&); @@ -4180,8 +4284,10 @@ class FileCryptoMetaData : public virtual ::apache::thrift::TBase { bool operator < (const FileCryptoMetaData & ) const; - uint32_t read(::apache::thrift::protocol::TProtocol* iprot) override; - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const override; + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; virtual void printTo(std::ostream& out) const; }; @@ -4192,4 +4298,6 @@ std::ostream& operator<<(std::ostream& out, const FileCryptoMetaData& obj); }} // namespace +#include "parquet_types.tcc" + #endif diff --git a/cpp/src/generated/parquet_types.tcc b/cpp/src/generated/parquet_types.tcc new file mode 100644 index 0000000000000..ee02d7f0139fc --- /dev/null +++ b/cpp/src/generated/parquet_types.tcc @@ -0,0 +1,4867 @@ +/** + * Autogenerated by Thrift Compiler (0.19.0) + * + * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING + * @generated + */ +#ifndef parquet_TYPES_TCC +#define parquet_TYPES_TCC + +#include "parquet_types.h" + +namespace parquet { namespace format { + +template +uint32_t SizeStatistics::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + switch (fid) + { + case 1: + if (ftype == ::apache::thrift::protocol::T_I64) { + xfer += iprot->readI64(this->unencoded_byte_array_data_bytes); + this->__isset.unencoded_byte_array_data_bytes = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 2: + if (ftype == ::apache::thrift::protocol::T_LIST) { + { + this->repetition_level_histogram.clear(); + uint32_t _size0; + ::apache::thrift::protocol::TType _etype3; + xfer += iprot->readListBegin(_etype3, _size0); + this->repetition_level_histogram.resize(_size0); + uint32_t _i4; + for (_i4 = 0; _i4 < _size0; ++_i4) + { + xfer += iprot->readI64(this->repetition_level_histogram[_i4]); + } + xfer += iprot->readListEnd(); + } + this->__isset.repetition_level_histogram = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 3: + if (ftype == ::apache::thrift::protocol::T_LIST) { + { + this->definition_level_histogram.clear(); + uint32_t _size5; + ::apache::thrift::protocol::TType _etype8; + xfer += iprot->readListBegin(_etype8, _size5); + this->definition_level_histogram.resize(_size5); + uint32_t _i9; + for (_i9 = 0; _i9 < _size5; ++_i9) + { + xfer += iprot->readI64(this->definition_level_histogram[_i9]); + } + xfer += iprot->readListEnd(); + } + this->__isset.definition_level_histogram = true; + } else { + xfer += iprot->skip(ftype); + } + break; + default: + xfer += iprot->skip(ftype); + break; + } + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + return xfer; +} + +template +uint32_t SizeStatistics::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("SizeStatistics"); + + if (this->__isset.unencoded_byte_array_data_bytes) { + xfer += oprot->writeFieldBegin("unencoded_byte_array_data_bytes", ::apache::thrift::protocol::T_I64, 1); + xfer += oprot->writeI64(this->unencoded_byte_array_data_bytes); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.repetition_level_histogram) { + xfer += oprot->writeFieldBegin("repetition_level_histogram", ::apache::thrift::protocol::T_LIST, 2); + { + xfer += oprot->writeListBegin(::apache::thrift::protocol::T_I64, static_cast(this->repetition_level_histogram.size())); + std::vector ::const_iterator _iter10; + for (_iter10 = this->repetition_level_histogram.begin(); _iter10 != this->repetition_level_histogram.end(); ++_iter10) + { + xfer += oprot->writeI64((*_iter10)); + } + xfer += oprot->writeListEnd(); + } + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.definition_level_histogram) { + xfer += oprot->writeFieldBegin("definition_level_histogram", ::apache::thrift::protocol::T_LIST, 3); + { + xfer += oprot->writeListBegin(::apache::thrift::protocol::T_I64, static_cast(this->definition_level_histogram.size())); + std::vector ::const_iterator _iter11; + for (_iter11 = this->definition_level_histogram.begin(); _iter11 != this->definition_level_histogram.end(); ++_iter11) + { + xfer += oprot->writeI64((*_iter11)); + } + xfer += oprot->writeListEnd(); + } + xfer += oprot->writeFieldEnd(); + } + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +template +uint32_t Statistics::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + switch (fid) + { + case 1: + if (ftype == ::apache::thrift::protocol::T_STRING) { + xfer += iprot->readBinary(this->max); + this->__isset.max = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 2: + if (ftype == ::apache::thrift::protocol::T_STRING) { + xfer += iprot->readBinary(this->min); + this->__isset.min = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 3: + if (ftype == ::apache::thrift::protocol::T_I64) { + xfer += iprot->readI64(this->null_count); + this->__isset.null_count = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 4: + if (ftype == ::apache::thrift::protocol::T_I64) { + xfer += iprot->readI64(this->distinct_count); + this->__isset.distinct_count = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 5: + if (ftype == ::apache::thrift::protocol::T_STRING) { + xfer += iprot->readBinary(this->max_value); + this->__isset.max_value = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 6: + if (ftype == ::apache::thrift::protocol::T_STRING) { + xfer += iprot->readBinary(this->min_value); + this->__isset.min_value = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 7: + if (ftype == ::apache::thrift::protocol::T_BOOL) { + xfer += iprot->readBool(this->is_max_value_exact); + this->__isset.is_max_value_exact = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 8: + if (ftype == ::apache::thrift::protocol::T_BOOL) { + xfer += iprot->readBool(this->is_min_value_exact); + this->__isset.is_min_value_exact = true; + } else { + xfer += iprot->skip(ftype); + } + break; + default: + xfer += iprot->skip(ftype); + break; + } + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + return xfer; +} + +template +uint32_t Statistics::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("Statistics"); + + if (this->__isset.max) { + xfer += oprot->writeFieldBegin("max", ::apache::thrift::protocol::T_STRING, 1); + xfer += oprot->writeBinary(this->max); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.min) { + xfer += oprot->writeFieldBegin("min", ::apache::thrift::protocol::T_STRING, 2); + xfer += oprot->writeBinary(this->min); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.null_count) { + xfer += oprot->writeFieldBegin("null_count", ::apache::thrift::protocol::T_I64, 3); + xfer += oprot->writeI64(this->null_count); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.distinct_count) { + xfer += oprot->writeFieldBegin("distinct_count", ::apache::thrift::protocol::T_I64, 4); + xfer += oprot->writeI64(this->distinct_count); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.max_value) { + xfer += oprot->writeFieldBegin("max_value", ::apache::thrift::protocol::T_STRING, 5); + xfer += oprot->writeBinary(this->max_value); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.min_value) { + xfer += oprot->writeFieldBegin("min_value", ::apache::thrift::protocol::T_STRING, 6); + xfer += oprot->writeBinary(this->min_value); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.is_max_value_exact) { + xfer += oprot->writeFieldBegin("is_max_value_exact", ::apache::thrift::protocol::T_BOOL, 7); + xfer += oprot->writeBool(this->is_max_value_exact); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.is_min_value_exact) { + xfer += oprot->writeFieldBegin("is_min_value_exact", ::apache::thrift::protocol::T_BOOL, 8); + xfer += oprot->writeBool(this->is_min_value_exact); + xfer += oprot->writeFieldEnd(); + } + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +template +uint32_t StringType::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + xfer += iprot->skip(ftype); + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + return xfer; +} + +template +uint32_t StringType::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("StringType"); + + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +template +uint32_t UUIDType::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + xfer += iprot->skip(ftype); + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + return xfer; +} + +template +uint32_t UUIDType::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("UUIDType"); + + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +template +uint32_t MapType::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + xfer += iprot->skip(ftype); + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + return xfer; +} + +template +uint32_t MapType::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("MapType"); + + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +template +uint32_t ListType::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + xfer += iprot->skip(ftype); + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + return xfer; +} + +template +uint32_t ListType::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("ListType"); + + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +template +uint32_t EnumType::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + xfer += iprot->skip(ftype); + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + return xfer; +} + +template +uint32_t EnumType::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("EnumType"); + + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +template +uint32_t DateType::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + xfer += iprot->skip(ftype); + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + return xfer; +} + +template +uint32_t DateType::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("DateType"); + + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +template +uint32_t Float16Type::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + xfer += iprot->skip(ftype); + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + return xfer; +} + +template +uint32_t Float16Type::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("Float16Type"); + + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +template +uint32_t NullType::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + xfer += iprot->skip(ftype); + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + return xfer; +} + +template +uint32_t NullType::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("NullType"); + + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +template +uint32_t DecimalType::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + bool isset_scale = false; + bool isset_precision = false; + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + switch (fid) + { + case 1: + if (ftype == ::apache::thrift::protocol::T_I32) { + xfer += iprot->readI32(this->scale); + isset_scale = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 2: + if (ftype == ::apache::thrift::protocol::T_I32) { + xfer += iprot->readI32(this->precision); + isset_precision = true; + } else { + xfer += iprot->skip(ftype); + } + break; + default: + xfer += iprot->skip(ftype); + break; + } + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + if (!isset_scale) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_precision) + throw TProtocolException(TProtocolException::INVALID_DATA); + return xfer; +} + +template +uint32_t DecimalType::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("DecimalType"); + + xfer += oprot->writeFieldBegin("scale", ::apache::thrift::protocol::T_I32, 1); + xfer += oprot->writeI32(this->scale); + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldBegin("precision", ::apache::thrift::protocol::T_I32, 2); + xfer += oprot->writeI32(this->precision); + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +template +uint32_t MilliSeconds::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + xfer += iprot->skip(ftype); + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + return xfer; +} + +template +uint32_t MilliSeconds::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("MilliSeconds"); + + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +template +uint32_t MicroSeconds::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + xfer += iprot->skip(ftype); + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + return xfer; +} + +template +uint32_t MicroSeconds::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("MicroSeconds"); + + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +template +uint32_t NanoSeconds::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + xfer += iprot->skip(ftype); + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + return xfer; +} + +template +uint32_t NanoSeconds::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("NanoSeconds"); + + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +template +uint32_t TimeUnit::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + switch (fid) + { + case 1: + if (ftype == ::apache::thrift::protocol::T_STRUCT) { + xfer += this->MILLIS.read(iprot); + this->__isset.MILLIS = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 2: + if (ftype == ::apache::thrift::protocol::T_STRUCT) { + xfer += this->MICROS.read(iprot); + this->__isset.MICROS = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 3: + if (ftype == ::apache::thrift::protocol::T_STRUCT) { + xfer += this->NANOS.read(iprot); + this->__isset.NANOS = true; + } else { + xfer += iprot->skip(ftype); + } + break; + default: + xfer += iprot->skip(ftype); + break; + } + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + return xfer; +} + +template +uint32_t TimeUnit::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("TimeUnit"); + + if (this->__isset.MILLIS) { + xfer += oprot->writeFieldBegin("MILLIS", ::apache::thrift::protocol::T_STRUCT, 1); + xfer += this->MILLIS.write(oprot); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.MICROS) { + xfer += oprot->writeFieldBegin("MICROS", ::apache::thrift::protocol::T_STRUCT, 2); + xfer += this->MICROS.write(oprot); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.NANOS) { + xfer += oprot->writeFieldBegin("NANOS", ::apache::thrift::protocol::T_STRUCT, 3); + xfer += this->NANOS.write(oprot); + xfer += oprot->writeFieldEnd(); + } + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +template +uint32_t TimestampType::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + bool isset_isAdjustedToUTC = false; + bool isset_unit = false; + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + switch (fid) + { + case 1: + if (ftype == ::apache::thrift::protocol::T_BOOL) { + xfer += iprot->readBool(this->isAdjustedToUTC); + isset_isAdjustedToUTC = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 2: + if (ftype == ::apache::thrift::protocol::T_STRUCT) { + xfer += this->unit.read(iprot); + isset_unit = true; + } else { + xfer += iprot->skip(ftype); + } + break; + default: + xfer += iprot->skip(ftype); + break; + } + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + if (!isset_isAdjustedToUTC) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_unit) + throw TProtocolException(TProtocolException::INVALID_DATA); + return xfer; +} + +template +uint32_t TimestampType::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("TimestampType"); + + xfer += oprot->writeFieldBegin("isAdjustedToUTC", ::apache::thrift::protocol::T_BOOL, 1); + xfer += oprot->writeBool(this->isAdjustedToUTC); + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldBegin("unit", ::apache::thrift::protocol::T_STRUCT, 2); + xfer += this->unit.write(oprot); + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +template +uint32_t TimeType::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + bool isset_isAdjustedToUTC = false; + bool isset_unit = false; + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + switch (fid) + { + case 1: + if (ftype == ::apache::thrift::protocol::T_BOOL) { + xfer += iprot->readBool(this->isAdjustedToUTC); + isset_isAdjustedToUTC = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 2: + if (ftype == ::apache::thrift::protocol::T_STRUCT) { + xfer += this->unit.read(iprot); + isset_unit = true; + } else { + xfer += iprot->skip(ftype); + } + break; + default: + xfer += iprot->skip(ftype); + break; + } + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + if (!isset_isAdjustedToUTC) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_unit) + throw TProtocolException(TProtocolException::INVALID_DATA); + return xfer; +} + +template +uint32_t TimeType::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("TimeType"); + + xfer += oprot->writeFieldBegin("isAdjustedToUTC", ::apache::thrift::protocol::T_BOOL, 1); + xfer += oprot->writeBool(this->isAdjustedToUTC); + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldBegin("unit", ::apache::thrift::protocol::T_STRUCT, 2); + xfer += this->unit.write(oprot); + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +template +uint32_t IntType::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + bool isset_bitWidth = false; + bool isset_isSigned = false; + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + switch (fid) + { + case 1: + if (ftype == ::apache::thrift::protocol::T_BYTE) { + xfer += iprot->readByte(this->bitWidth); + isset_bitWidth = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 2: + if (ftype == ::apache::thrift::protocol::T_BOOL) { + xfer += iprot->readBool(this->isSigned); + isset_isSigned = true; + } else { + xfer += iprot->skip(ftype); + } + break; + default: + xfer += iprot->skip(ftype); + break; + } + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + if (!isset_bitWidth) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_isSigned) + throw TProtocolException(TProtocolException::INVALID_DATA); + return xfer; +} + +template +uint32_t IntType::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("IntType"); + + xfer += oprot->writeFieldBegin("bitWidth", ::apache::thrift::protocol::T_BYTE, 1); + xfer += oprot->writeByte(this->bitWidth); + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldBegin("isSigned", ::apache::thrift::protocol::T_BOOL, 2); + xfer += oprot->writeBool(this->isSigned); + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +template +uint32_t JsonType::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + xfer += iprot->skip(ftype); + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + return xfer; +} + +template +uint32_t JsonType::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("JsonType"); + + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +template +uint32_t BsonType::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + xfer += iprot->skip(ftype); + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + return xfer; +} + +template +uint32_t BsonType::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("BsonType"); + + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +template +uint32_t LogicalType::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + switch (fid) + { + case 1: + if (ftype == ::apache::thrift::protocol::T_STRUCT) { + xfer += this->STRING.read(iprot); + this->__isset.STRING = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 2: + if (ftype == ::apache::thrift::protocol::T_STRUCT) { + xfer += this->MAP.read(iprot); + this->__isset.MAP = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 3: + if (ftype == ::apache::thrift::protocol::T_STRUCT) { + xfer += this->LIST.read(iprot); + this->__isset.LIST = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 4: + if (ftype == ::apache::thrift::protocol::T_STRUCT) { + xfer += this->ENUM.read(iprot); + this->__isset.ENUM = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 5: + if (ftype == ::apache::thrift::protocol::T_STRUCT) { + xfer += this->DECIMAL.read(iprot); + this->__isset.DECIMAL = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 6: + if (ftype == ::apache::thrift::protocol::T_STRUCT) { + xfer += this->DATE.read(iprot); + this->__isset.DATE = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 7: + if (ftype == ::apache::thrift::protocol::T_STRUCT) { + xfer += this->TIME.read(iprot); + this->__isset.TIME = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 8: + if (ftype == ::apache::thrift::protocol::T_STRUCT) { + xfer += this->TIMESTAMP.read(iprot); + this->__isset.TIMESTAMP = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 10: + if (ftype == ::apache::thrift::protocol::T_STRUCT) { + xfer += this->INTEGER.read(iprot); + this->__isset.INTEGER = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 11: + if (ftype == ::apache::thrift::protocol::T_STRUCT) { + xfer += this->UNKNOWN.read(iprot); + this->__isset.UNKNOWN = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 12: + if (ftype == ::apache::thrift::protocol::T_STRUCT) { + xfer += this->JSON.read(iprot); + this->__isset.JSON = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 13: + if (ftype == ::apache::thrift::protocol::T_STRUCT) { + xfer += this->BSON.read(iprot); + this->__isset.BSON = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 14: + if (ftype == ::apache::thrift::protocol::T_STRUCT) { + xfer += this->UUID.read(iprot); + this->__isset.UUID = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 15: + if (ftype == ::apache::thrift::protocol::T_STRUCT) { + xfer += this->FLOAT16.read(iprot); + this->__isset.FLOAT16 = true; + } else { + xfer += iprot->skip(ftype); + } + break; + default: + xfer += iprot->skip(ftype); + break; + } + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + return xfer; +} + +template +uint32_t LogicalType::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("LogicalType"); + + if (this->__isset.STRING) { + xfer += oprot->writeFieldBegin("STRING", ::apache::thrift::protocol::T_STRUCT, 1); + xfer += this->STRING.write(oprot); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.MAP) { + xfer += oprot->writeFieldBegin("MAP", ::apache::thrift::protocol::T_STRUCT, 2); + xfer += this->MAP.write(oprot); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.LIST) { + xfer += oprot->writeFieldBegin("LIST", ::apache::thrift::protocol::T_STRUCT, 3); + xfer += this->LIST.write(oprot); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.ENUM) { + xfer += oprot->writeFieldBegin("ENUM", ::apache::thrift::protocol::T_STRUCT, 4); + xfer += this->ENUM.write(oprot); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.DECIMAL) { + xfer += oprot->writeFieldBegin("DECIMAL", ::apache::thrift::protocol::T_STRUCT, 5); + xfer += this->DECIMAL.write(oprot); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.DATE) { + xfer += oprot->writeFieldBegin("DATE", ::apache::thrift::protocol::T_STRUCT, 6); + xfer += this->DATE.write(oprot); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.TIME) { + xfer += oprot->writeFieldBegin("TIME", ::apache::thrift::protocol::T_STRUCT, 7); + xfer += this->TIME.write(oprot); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.TIMESTAMP) { + xfer += oprot->writeFieldBegin("TIMESTAMP", ::apache::thrift::protocol::T_STRUCT, 8); + xfer += this->TIMESTAMP.write(oprot); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.INTEGER) { + xfer += oprot->writeFieldBegin("INTEGER", ::apache::thrift::protocol::T_STRUCT, 10); + xfer += this->INTEGER.write(oprot); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.UNKNOWN) { + xfer += oprot->writeFieldBegin("UNKNOWN", ::apache::thrift::protocol::T_STRUCT, 11); + xfer += this->UNKNOWN.write(oprot); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.JSON) { + xfer += oprot->writeFieldBegin("JSON", ::apache::thrift::protocol::T_STRUCT, 12); + xfer += this->JSON.write(oprot); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.BSON) { + xfer += oprot->writeFieldBegin("BSON", ::apache::thrift::protocol::T_STRUCT, 13); + xfer += this->BSON.write(oprot); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.UUID) { + xfer += oprot->writeFieldBegin("UUID", ::apache::thrift::protocol::T_STRUCT, 14); + xfer += this->UUID.write(oprot); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.FLOAT16) { + xfer += oprot->writeFieldBegin("FLOAT16", ::apache::thrift::protocol::T_STRUCT, 15); + xfer += this->FLOAT16.write(oprot); + xfer += oprot->writeFieldEnd(); + } + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +template +uint32_t SchemaElement::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + bool isset_name = false; + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + switch (fid) + { + case 1: + if (ftype == ::apache::thrift::protocol::T_I32) { + int32_t ecast96; + xfer += iprot->readI32(ecast96); + this->type = static_cast(ecast96); + this->__isset.type = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 2: + if (ftype == ::apache::thrift::protocol::T_I32) { + xfer += iprot->readI32(this->type_length); + this->__isset.type_length = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 3: + if (ftype == ::apache::thrift::protocol::T_I32) { + int32_t ecast97; + xfer += iprot->readI32(ecast97); + this->repetition_type = static_cast(ecast97); + this->__isset.repetition_type = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 4: + if (ftype == ::apache::thrift::protocol::T_STRING) { + xfer += iprot->readString(this->name); + isset_name = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 5: + if (ftype == ::apache::thrift::protocol::T_I32) { + xfer += iprot->readI32(this->num_children); + this->__isset.num_children = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 6: + if (ftype == ::apache::thrift::protocol::T_I32) { + int32_t ecast98; + xfer += iprot->readI32(ecast98); + this->converted_type = static_cast(ecast98); + this->__isset.converted_type = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 7: + if (ftype == ::apache::thrift::protocol::T_I32) { + xfer += iprot->readI32(this->scale); + this->__isset.scale = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 8: + if (ftype == ::apache::thrift::protocol::T_I32) { + xfer += iprot->readI32(this->precision); + this->__isset.precision = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 9: + if (ftype == ::apache::thrift::protocol::T_I32) { + xfer += iprot->readI32(this->field_id); + this->__isset.field_id = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 10: + if (ftype == ::apache::thrift::protocol::T_STRUCT) { + xfer += this->logicalType.read(iprot); + this->__isset.logicalType = true; + } else { + xfer += iprot->skip(ftype); + } + break; + default: + xfer += iprot->skip(ftype); + break; + } + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + if (!isset_name) + throw TProtocolException(TProtocolException::INVALID_DATA); + return xfer; +} + +template +uint32_t SchemaElement::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("SchemaElement"); + + if (this->__isset.type) { + xfer += oprot->writeFieldBegin("type", ::apache::thrift::protocol::T_I32, 1); + xfer += oprot->writeI32(static_cast(this->type)); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.type_length) { + xfer += oprot->writeFieldBegin("type_length", ::apache::thrift::protocol::T_I32, 2); + xfer += oprot->writeI32(this->type_length); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.repetition_type) { + xfer += oprot->writeFieldBegin("repetition_type", ::apache::thrift::protocol::T_I32, 3); + xfer += oprot->writeI32(static_cast(this->repetition_type)); + xfer += oprot->writeFieldEnd(); + } + xfer += oprot->writeFieldBegin("name", ::apache::thrift::protocol::T_STRING, 4); + xfer += oprot->writeString(this->name); + xfer += oprot->writeFieldEnd(); + + if (this->__isset.num_children) { + xfer += oprot->writeFieldBegin("num_children", ::apache::thrift::protocol::T_I32, 5); + xfer += oprot->writeI32(this->num_children); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.converted_type) { + xfer += oprot->writeFieldBegin("converted_type", ::apache::thrift::protocol::T_I32, 6); + xfer += oprot->writeI32(static_cast(this->converted_type)); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.scale) { + xfer += oprot->writeFieldBegin("scale", ::apache::thrift::protocol::T_I32, 7); + xfer += oprot->writeI32(this->scale); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.precision) { + xfer += oprot->writeFieldBegin("precision", ::apache::thrift::protocol::T_I32, 8); + xfer += oprot->writeI32(this->precision); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.field_id) { + xfer += oprot->writeFieldBegin("field_id", ::apache::thrift::protocol::T_I32, 9); + xfer += oprot->writeI32(this->field_id); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.logicalType) { + xfer += oprot->writeFieldBegin("logicalType", ::apache::thrift::protocol::T_STRUCT, 10); + xfer += this->logicalType.write(oprot); + xfer += oprot->writeFieldEnd(); + } + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +template +uint32_t DataPageHeader::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + bool isset_num_values = false; + bool isset_encoding = false; + bool isset_definition_level_encoding = false; + bool isset_repetition_level_encoding = false; + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + switch (fid) + { + case 1: + if (ftype == ::apache::thrift::protocol::T_I32) { + xfer += iprot->readI32(this->num_values); + isset_num_values = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 2: + if (ftype == ::apache::thrift::protocol::T_I32) { + int32_t ecast103; + xfer += iprot->readI32(ecast103); + this->encoding = static_cast(ecast103); + isset_encoding = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 3: + if (ftype == ::apache::thrift::protocol::T_I32) { + int32_t ecast104; + xfer += iprot->readI32(ecast104); + this->definition_level_encoding = static_cast(ecast104); + isset_definition_level_encoding = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 4: + if (ftype == ::apache::thrift::protocol::T_I32) { + int32_t ecast105; + xfer += iprot->readI32(ecast105); + this->repetition_level_encoding = static_cast(ecast105); + isset_repetition_level_encoding = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 5: + if (ftype == ::apache::thrift::protocol::T_STRUCT) { + xfer += this->statistics.read(iprot); + this->__isset.statistics = true; + } else { + xfer += iprot->skip(ftype); + } + break; + default: + xfer += iprot->skip(ftype); + break; + } + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + if (!isset_num_values) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_encoding) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_definition_level_encoding) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_repetition_level_encoding) + throw TProtocolException(TProtocolException::INVALID_DATA); + return xfer; +} + +template +uint32_t DataPageHeader::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("DataPageHeader"); + + xfer += oprot->writeFieldBegin("num_values", ::apache::thrift::protocol::T_I32, 1); + xfer += oprot->writeI32(this->num_values); + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldBegin("encoding", ::apache::thrift::protocol::T_I32, 2); + xfer += oprot->writeI32(static_cast(this->encoding)); + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldBegin("definition_level_encoding", ::apache::thrift::protocol::T_I32, 3); + xfer += oprot->writeI32(static_cast(this->definition_level_encoding)); + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldBegin("repetition_level_encoding", ::apache::thrift::protocol::T_I32, 4); + xfer += oprot->writeI32(static_cast(this->repetition_level_encoding)); + xfer += oprot->writeFieldEnd(); + + if (this->__isset.statistics) { + xfer += oprot->writeFieldBegin("statistics", ::apache::thrift::protocol::T_STRUCT, 5); + xfer += this->statistics.write(oprot); + xfer += oprot->writeFieldEnd(); + } + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +template +uint32_t IndexPageHeader::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + xfer += iprot->skip(ftype); + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + return xfer; +} + +template +uint32_t IndexPageHeader::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("IndexPageHeader"); + + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +template +uint32_t DictionaryPageHeader::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + bool isset_num_values = false; + bool isset_encoding = false; + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + switch (fid) + { + case 1: + if (ftype == ::apache::thrift::protocol::T_I32) { + xfer += iprot->readI32(this->num_values); + isset_num_values = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 2: + if (ftype == ::apache::thrift::protocol::T_I32) { + int32_t ecast114; + xfer += iprot->readI32(ecast114); + this->encoding = static_cast(ecast114); + isset_encoding = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 3: + if (ftype == ::apache::thrift::protocol::T_BOOL) { + xfer += iprot->readBool(this->is_sorted); + this->__isset.is_sorted = true; + } else { + xfer += iprot->skip(ftype); + } + break; + default: + xfer += iprot->skip(ftype); + break; + } + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + if (!isset_num_values) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_encoding) + throw TProtocolException(TProtocolException::INVALID_DATA); + return xfer; +} + +template +uint32_t DictionaryPageHeader::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("DictionaryPageHeader"); + + xfer += oprot->writeFieldBegin("num_values", ::apache::thrift::protocol::T_I32, 1); + xfer += oprot->writeI32(this->num_values); + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldBegin("encoding", ::apache::thrift::protocol::T_I32, 2); + xfer += oprot->writeI32(static_cast(this->encoding)); + xfer += oprot->writeFieldEnd(); + + if (this->__isset.is_sorted) { + xfer += oprot->writeFieldBegin("is_sorted", ::apache::thrift::protocol::T_BOOL, 3); + xfer += oprot->writeBool(this->is_sorted); + xfer += oprot->writeFieldEnd(); + } + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +template +uint32_t DataPageHeaderV2::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + bool isset_num_values = false; + bool isset_num_nulls = false; + bool isset_num_rows = false; + bool isset_encoding = false; + bool isset_definition_levels_byte_length = false; + bool isset_repetition_levels_byte_length = false; + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + switch (fid) + { + case 1: + if (ftype == ::apache::thrift::protocol::T_I32) { + xfer += iprot->readI32(this->num_values); + isset_num_values = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 2: + if (ftype == ::apache::thrift::protocol::T_I32) { + xfer += iprot->readI32(this->num_nulls); + isset_num_nulls = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 3: + if (ftype == ::apache::thrift::protocol::T_I32) { + xfer += iprot->readI32(this->num_rows); + isset_num_rows = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 4: + if (ftype == ::apache::thrift::protocol::T_I32) { + int32_t ecast119; + xfer += iprot->readI32(ecast119); + this->encoding = static_cast(ecast119); + isset_encoding = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 5: + if (ftype == ::apache::thrift::protocol::T_I32) { + xfer += iprot->readI32(this->definition_levels_byte_length); + isset_definition_levels_byte_length = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 6: + if (ftype == ::apache::thrift::protocol::T_I32) { + xfer += iprot->readI32(this->repetition_levels_byte_length); + isset_repetition_levels_byte_length = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 7: + if (ftype == ::apache::thrift::protocol::T_BOOL) { + xfer += iprot->readBool(this->is_compressed); + this->__isset.is_compressed = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 8: + if (ftype == ::apache::thrift::protocol::T_STRUCT) { + xfer += this->statistics.read(iprot); + this->__isset.statistics = true; + } else { + xfer += iprot->skip(ftype); + } + break; + default: + xfer += iprot->skip(ftype); + break; + } + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + if (!isset_num_values) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_num_nulls) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_num_rows) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_encoding) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_definition_levels_byte_length) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_repetition_levels_byte_length) + throw TProtocolException(TProtocolException::INVALID_DATA); + return xfer; +} + +template +uint32_t DataPageHeaderV2::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("DataPageHeaderV2"); + + xfer += oprot->writeFieldBegin("num_values", ::apache::thrift::protocol::T_I32, 1); + xfer += oprot->writeI32(this->num_values); + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldBegin("num_nulls", ::apache::thrift::protocol::T_I32, 2); + xfer += oprot->writeI32(this->num_nulls); + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldBegin("num_rows", ::apache::thrift::protocol::T_I32, 3); + xfer += oprot->writeI32(this->num_rows); + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldBegin("encoding", ::apache::thrift::protocol::T_I32, 4); + xfer += oprot->writeI32(static_cast(this->encoding)); + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldBegin("definition_levels_byte_length", ::apache::thrift::protocol::T_I32, 5); + xfer += oprot->writeI32(this->definition_levels_byte_length); + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldBegin("repetition_levels_byte_length", ::apache::thrift::protocol::T_I32, 6); + xfer += oprot->writeI32(this->repetition_levels_byte_length); + xfer += oprot->writeFieldEnd(); + + if (this->__isset.is_compressed) { + xfer += oprot->writeFieldBegin("is_compressed", ::apache::thrift::protocol::T_BOOL, 7); + xfer += oprot->writeBool(this->is_compressed); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.statistics) { + xfer += oprot->writeFieldBegin("statistics", ::apache::thrift::protocol::T_STRUCT, 8); + xfer += this->statistics.write(oprot); + xfer += oprot->writeFieldEnd(); + } + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +template +uint32_t SplitBlockAlgorithm::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + xfer += iprot->skip(ftype); + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + return xfer; +} + +template +uint32_t SplitBlockAlgorithm::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("SplitBlockAlgorithm"); + + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +template +uint32_t BloomFilterAlgorithm::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + switch (fid) + { + case 1: + if (ftype == ::apache::thrift::protocol::T_STRUCT) { + xfer += this->BLOCK.read(iprot); + this->__isset.BLOCK = true; + } else { + xfer += iprot->skip(ftype); + } + break; + default: + xfer += iprot->skip(ftype); + break; + } + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + return xfer; +} + +template +uint32_t BloomFilterAlgorithm::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("BloomFilterAlgorithm"); + + if (this->__isset.BLOCK) { + xfer += oprot->writeFieldBegin("BLOCK", ::apache::thrift::protocol::T_STRUCT, 1); + xfer += this->BLOCK.write(oprot); + xfer += oprot->writeFieldEnd(); + } + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +template +uint32_t XxHash::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + xfer += iprot->skip(ftype); + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + return xfer; +} + +template +uint32_t XxHash::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("XxHash"); + + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +template +uint32_t BloomFilterHash::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + switch (fid) + { + case 1: + if (ftype == ::apache::thrift::protocol::T_STRUCT) { + xfer += this->XXHASH.read(iprot); + this->__isset.XXHASH = true; + } else { + xfer += iprot->skip(ftype); + } + break; + default: + xfer += iprot->skip(ftype); + break; + } + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + return xfer; +} + +template +uint32_t BloomFilterHash::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("BloomFilterHash"); + + if (this->__isset.XXHASH) { + xfer += oprot->writeFieldBegin("XXHASH", ::apache::thrift::protocol::T_STRUCT, 1); + xfer += this->XXHASH.write(oprot); + xfer += oprot->writeFieldEnd(); + } + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +template +uint32_t Uncompressed::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + xfer += iprot->skip(ftype); + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + return xfer; +} + +template +uint32_t Uncompressed::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("Uncompressed"); + + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +template +uint32_t BloomFilterCompression::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + switch (fid) + { + case 1: + if (ftype == ::apache::thrift::protocol::T_STRUCT) { + xfer += this->UNCOMPRESSED.read(iprot); + this->__isset.UNCOMPRESSED = true; + } else { + xfer += iprot->skip(ftype); + } + break; + default: + xfer += iprot->skip(ftype); + break; + } + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + return xfer; +} + +template +uint32_t BloomFilterCompression::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("BloomFilterCompression"); + + if (this->__isset.UNCOMPRESSED) { + xfer += oprot->writeFieldBegin("UNCOMPRESSED", ::apache::thrift::protocol::T_STRUCT, 1); + xfer += this->UNCOMPRESSED.write(oprot); + xfer += oprot->writeFieldEnd(); + } + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +template +uint32_t BloomFilterHeader::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + bool isset_numBytes = false; + bool isset_algorithm = false; + bool isset_hash = false; + bool isset_compression = false; + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + switch (fid) + { + case 1: + if (ftype == ::apache::thrift::protocol::T_I32) { + xfer += iprot->readI32(this->numBytes); + isset_numBytes = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 2: + if (ftype == ::apache::thrift::protocol::T_STRUCT) { + xfer += this->algorithm.read(iprot); + isset_algorithm = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 3: + if (ftype == ::apache::thrift::protocol::T_STRUCT) { + xfer += this->hash.read(iprot); + isset_hash = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 4: + if (ftype == ::apache::thrift::protocol::T_STRUCT) { + xfer += this->compression.read(iprot); + isset_compression = true; + } else { + xfer += iprot->skip(ftype); + } + break; + default: + xfer += iprot->skip(ftype); + break; + } + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + if (!isset_numBytes) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_algorithm) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_hash) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_compression) + throw TProtocolException(TProtocolException::INVALID_DATA); + return xfer; +} + +template +uint32_t BloomFilterHeader::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("BloomFilterHeader"); + + xfer += oprot->writeFieldBegin("numBytes", ::apache::thrift::protocol::T_I32, 1); + xfer += oprot->writeI32(this->numBytes); + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldBegin("algorithm", ::apache::thrift::protocol::T_STRUCT, 2); + xfer += this->algorithm.write(oprot); + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldBegin("hash", ::apache::thrift::protocol::T_STRUCT, 3); + xfer += this->hash.write(oprot); + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldBegin("compression", ::apache::thrift::protocol::T_STRUCT, 4); + xfer += this->compression.write(oprot); + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +template +uint32_t PageHeader::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + bool isset_type = false; + bool isset_uncompressed_page_size = false; + bool isset_compressed_page_size = false; + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + switch (fid) + { + case 1: + if (ftype == ::apache::thrift::protocol::T_I32) { + int32_t ecast152; + xfer += iprot->readI32(ecast152); + this->type = static_cast(ecast152); + isset_type = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 2: + if (ftype == ::apache::thrift::protocol::T_I32) { + xfer += iprot->readI32(this->uncompressed_page_size); + isset_uncompressed_page_size = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 3: + if (ftype == ::apache::thrift::protocol::T_I32) { + xfer += iprot->readI32(this->compressed_page_size); + isset_compressed_page_size = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 4: + if (ftype == ::apache::thrift::protocol::T_I32) { + xfer += iprot->readI32(this->crc); + this->__isset.crc = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 5: + if (ftype == ::apache::thrift::protocol::T_STRUCT) { + xfer += this->data_page_header.read(iprot); + this->__isset.data_page_header = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 6: + if (ftype == ::apache::thrift::protocol::T_STRUCT) { + xfer += this->index_page_header.read(iprot); + this->__isset.index_page_header = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 7: + if (ftype == ::apache::thrift::protocol::T_STRUCT) { + xfer += this->dictionary_page_header.read(iprot); + this->__isset.dictionary_page_header = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 8: + if (ftype == ::apache::thrift::protocol::T_STRUCT) { + xfer += this->data_page_header_v2.read(iprot); + this->__isset.data_page_header_v2 = true; + } else { + xfer += iprot->skip(ftype); + } + break; + default: + xfer += iprot->skip(ftype); + break; + } + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + if (!isset_type) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_uncompressed_page_size) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_compressed_page_size) + throw TProtocolException(TProtocolException::INVALID_DATA); + return xfer; +} + +template +uint32_t PageHeader::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("PageHeader"); + + xfer += oprot->writeFieldBegin("type", ::apache::thrift::protocol::T_I32, 1); + xfer += oprot->writeI32(static_cast(this->type)); + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldBegin("uncompressed_page_size", ::apache::thrift::protocol::T_I32, 2); + xfer += oprot->writeI32(this->uncompressed_page_size); + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldBegin("compressed_page_size", ::apache::thrift::protocol::T_I32, 3); + xfer += oprot->writeI32(this->compressed_page_size); + xfer += oprot->writeFieldEnd(); + + if (this->__isset.crc) { + xfer += oprot->writeFieldBegin("crc", ::apache::thrift::protocol::T_I32, 4); + xfer += oprot->writeI32(this->crc); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.data_page_header) { + xfer += oprot->writeFieldBegin("data_page_header", ::apache::thrift::protocol::T_STRUCT, 5); + xfer += this->data_page_header.write(oprot); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.index_page_header) { + xfer += oprot->writeFieldBegin("index_page_header", ::apache::thrift::protocol::T_STRUCT, 6); + xfer += this->index_page_header.write(oprot); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.dictionary_page_header) { + xfer += oprot->writeFieldBegin("dictionary_page_header", ::apache::thrift::protocol::T_STRUCT, 7); + xfer += this->dictionary_page_header.write(oprot); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.data_page_header_v2) { + xfer += oprot->writeFieldBegin("data_page_header_v2", ::apache::thrift::protocol::T_STRUCT, 8); + xfer += this->data_page_header_v2.write(oprot); + xfer += oprot->writeFieldEnd(); + } + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +template +uint32_t KeyValue::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + bool isset_key = false; + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + switch (fid) + { + case 1: + if (ftype == ::apache::thrift::protocol::T_STRING) { + xfer += iprot->readString(this->key); + isset_key = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 2: + if (ftype == ::apache::thrift::protocol::T_STRING) { + xfer += iprot->readString(this->value); + this->__isset.value = true; + } else { + xfer += iprot->skip(ftype); + } + break; + default: + xfer += iprot->skip(ftype); + break; + } + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + if (!isset_key) + throw TProtocolException(TProtocolException::INVALID_DATA); + return xfer; +} + +template +uint32_t KeyValue::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("KeyValue"); + + xfer += oprot->writeFieldBegin("key", ::apache::thrift::protocol::T_STRING, 1); + xfer += oprot->writeString(this->key); + xfer += oprot->writeFieldEnd(); + + if (this->__isset.value) { + xfer += oprot->writeFieldBegin("value", ::apache::thrift::protocol::T_STRING, 2); + xfer += oprot->writeString(this->value); + xfer += oprot->writeFieldEnd(); + } + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +template +uint32_t SortingColumn::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + bool isset_column_idx = false; + bool isset_descending = false; + bool isset_nulls_first = false; + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + switch (fid) + { + case 1: + if (ftype == ::apache::thrift::protocol::T_I32) { + xfer += iprot->readI32(this->column_idx); + isset_column_idx = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 2: + if (ftype == ::apache::thrift::protocol::T_BOOL) { + xfer += iprot->readBool(this->descending); + isset_descending = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 3: + if (ftype == ::apache::thrift::protocol::T_BOOL) { + xfer += iprot->readBool(this->nulls_first); + isset_nulls_first = true; + } else { + xfer += iprot->skip(ftype); + } + break; + default: + xfer += iprot->skip(ftype); + break; + } + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + if (!isset_column_idx) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_descending) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_nulls_first) + throw TProtocolException(TProtocolException::INVALID_DATA); + return xfer; +} + +template +uint32_t SortingColumn::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("SortingColumn"); + + xfer += oprot->writeFieldBegin("column_idx", ::apache::thrift::protocol::T_I32, 1); + xfer += oprot->writeI32(this->column_idx); + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldBegin("descending", ::apache::thrift::protocol::T_BOOL, 2); + xfer += oprot->writeBool(this->descending); + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldBegin("nulls_first", ::apache::thrift::protocol::T_BOOL, 3); + xfer += oprot->writeBool(this->nulls_first); + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +template +uint32_t PageEncodingStats::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + bool isset_page_type = false; + bool isset_encoding = false; + bool isset_count = false; + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + switch (fid) + { + case 1: + if (ftype == ::apache::thrift::protocol::T_I32) { + int32_t ecast165; + xfer += iprot->readI32(ecast165); + this->page_type = static_cast(ecast165); + isset_page_type = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 2: + if (ftype == ::apache::thrift::protocol::T_I32) { + int32_t ecast166; + xfer += iprot->readI32(ecast166); + this->encoding = static_cast(ecast166); + isset_encoding = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 3: + if (ftype == ::apache::thrift::protocol::T_I32) { + xfer += iprot->readI32(this->count); + isset_count = true; + } else { + xfer += iprot->skip(ftype); + } + break; + default: + xfer += iprot->skip(ftype); + break; + } + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + if (!isset_page_type) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_encoding) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_count) + throw TProtocolException(TProtocolException::INVALID_DATA); + return xfer; +} + +template +uint32_t PageEncodingStats::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("PageEncodingStats"); + + xfer += oprot->writeFieldBegin("page_type", ::apache::thrift::protocol::T_I32, 1); + xfer += oprot->writeI32(static_cast(this->page_type)); + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldBegin("encoding", ::apache::thrift::protocol::T_I32, 2); + xfer += oprot->writeI32(static_cast(this->encoding)); + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldBegin("count", ::apache::thrift::protocol::T_I32, 3); + xfer += oprot->writeI32(this->count); + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +template +uint32_t ColumnMetaData::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + bool isset_type = false; + bool isset_encodings = false; + bool isset_path_in_schema = false; + bool isset_codec = false; + bool isset_num_values = false; + bool isset_total_uncompressed_size = false; + bool isset_total_compressed_size = false; + bool isset_data_page_offset = false; + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + switch (fid) + { + case 1: + if (ftype == ::apache::thrift::protocol::T_I32) { + int32_t ecast171; + xfer += iprot->readI32(ecast171); + this->type = static_cast(ecast171); + isset_type = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 2: + if (ftype == ::apache::thrift::protocol::T_LIST) { + { + this->encodings.clear(); + uint32_t _size172; + ::apache::thrift::protocol::TType _etype175; + xfer += iprot->readListBegin(_etype175, _size172); + this->encodings.resize(_size172); + uint32_t _i176; + for (_i176 = 0; _i176 < _size172; ++_i176) + { + int32_t ecast177; + xfer += iprot->readI32(ecast177); + this->encodings[_i176] = static_cast(ecast177); + } + xfer += iprot->readListEnd(); + } + isset_encodings = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 3: + if (ftype == ::apache::thrift::protocol::T_LIST) { + { + this->path_in_schema.clear(); + uint32_t _size178; + ::apache::thrift::protocol::TType _etype181; + xfer += iprot->readListBegin(_etype181, _size178); + this->path_in_schema.resize(_size178); + uint32_t _i182; + for (_i182 = 0; _i182 < _size178; ++_i182) + { + xfer += iprot->readString(this->path_in_schema[_i182]); + } + xfer += iprot->readListEnd(); + } + isset_path_in_schema = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 4: + if (ftype == ::apache::thrift::protocol::T_I32) { + int32_t ecast183; + xfer += iprot->readI32(ecast183); + this->codec = static_cast(ecast183); + isset_codec = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 5: + if (ftype == ::apache::thrift::protocol::T_I64) { + xfer += iprot->readI64(this->num_values); + isset_num_values = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 6: + if (ftype == ::apache::thrift::protocol::T_I64) { + xfer += iprot->readI64(this->total_uncompressed_size); + isset_total_uncompressed_size = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 7: + if (ftype == ::apache::thrift::protocol::T_I64) { + xfer += iprot->readI64(this->total_compressed_size); + isset_total_compressed_size = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 8: + if (ftype == ::apache::thrift::protocol::T_LIST) { + { + this->key_value_metadata.clear(); + uint32_t _size184; + ::apache::thrift::protocol::TType _etype187; + xfer += iprot->readListBegin(_etype187, _size184); + this->key_value_metadata.resize(_size184); + uint32_t _i188; + for (_i188 = 0; _i188 < _size184; ++_i188) + { + xfer += this->key_value_metadata[_i188].read(iprot); + } + xfer += iprot->readListEnd(); + } + this->__isset.key_value_metadata = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 9: + if (ftype == ::apache::thrift::protocol::T_I64) { + xfer += iprot->readI64(this->data_page_offset); + isset_data_page_offset = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 10: + if (ftype == ::apache::thrift::protocol::T_I64) { + xfer += iprot->readI64(this->index_page_offset); + this->__isset.index_page_offset = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 11: + if (ftype == ::apache::thrift::protocol::T_I64) { + xfer += iprot->readI64(this->dictionary_page_offset); + this->__isset.dictionary_page_offset = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 12: + if (ftype == ::apache::thrift::protocol::T_STRUCT) { + xfer += this->statistics.read(iprot); + this->__isset.statistics = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 13: + if (ftype == ::apache::thrift::protocol::T_LIST) { + { + this->encoding_stats.clear(); + uint32_t _size189; + ::apache::thrift::protocol::TType _etype192; + xfer += iprot->readListBegin(_etype192, _size189); + this->encoding_stats.resize(_size189); + uint32_t _i193; + for (_i193 = 0; _i193 < _size189; ++_i193) + { + xfer += this->encoding_stats[_i193].read(iprot); + } + xfer += iprot->readListEnd(); + } + this->__isset.encoding_stats = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 14: + if (ftype == ::apache::thrift::protocol::T_I64) { + xfer += iprot->readI64(this->bloom_filter_offset); + this->__isset.bloom_filter_offset = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 15: + if (ftype == ::apache::thrift::protocol::T_I32) { + xfer += iprot->readI32(this->bloom_filter_length); + this->__isset.bloom_filter_length = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 16: + if (ftype == ::apache::thrift::protocol::T_STRUCT) { + xfer += this->size_statistics.read(iprot); + this->__isset.size_statistics = true; + } else { + xfer += iprot->skip(ftype); + } + break; + default: + xfer += iprot->skip(ftype); + break; + } + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + if (!isset_type) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_encodings) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_path_in_schema) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_codec) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_num_values) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_total_uncompressed_size) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_total_compressed_size) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_data_page_offset) + throw TProtocolException(TProtocolException::INVALID_DATA); + return xfer; +} + +template +uint32_t ColumnMetaData::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("ColumnMetaData"); + + xfer += oprot->writeFieldBegin("type", ::apache::thrift::protocol::T_I32, 1); + xfer += oprot->writeI32(static_cast(this->type)); + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldBegin("encodings", ::apache::thrift::protocol::T_LIST, 2); + { + xfer += oprot->writeListBegin(::apache::thrift::protocol::T_I32, static_cast(this->encodings.size())); + std::vector ::const_iterator _iter194; + for (_iter194 = this->encodings.begin(); _iter194 != this->encodings.end(); ++_iter194) + { + xfer += oprot->writeI32(static_cast((*_iter194))); + } + xfer += oprot->writeListEnd(); + } + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldBegin("path_in_schema", ::apache::thrift::protocol::T_LIST, 3); + { + xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRING, static_cast(this->path_in_schema.size())); + std::vector ::const_iterator _iter195; + for (_iter195 = this->path_in_schema.begin(); _iter195 != this->path_in_schema.end(); ++_iter195) + { + xfer += oprot->writeString((*_iter195)); + } + xfer += oprot->writeListEnd(); + } + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldBegin("codec", ::apache::thrift::protocol::T_I32, 4); + xfer += oprot->writeI32(static_cast(this->codec)); + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldBegin("num_values", ::apache::thrift::protocol::T_I64, 5); + xfer += oprot->writeI64(this->num_values); + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldBegin("total_uncompressed_size", ::apache::thrift::protocol::T_I64, 6); + xfer += oprot->writeI64(this->total_uncompressed_size); + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldBegin("total_compressed_size", ::apache::thrift::protocol::T_I64, 7); + xfer += oprot->writeI64(this->total_compressed_size); + xfer += oprot->writeFieldEnd(); + + if (this->__isset.key_value_metadata) { + xfer += oprot->writeFieldBegin("key_value_metadata", ::apache::thrift::protocol::T_LIST, 8); + { + xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast(this->key_value_metadata.size())); + std::vector ::const_iterator _iter196; + for (_iter196 = this->key_value_metadata.begin(); _iter196 != this->key_value_metadata.end(); ++_iter196) + { + xfer += (*_iter196).write(oprot); + } + xfer += oprot->writeListEnd(); + } + xfer += oprot->writeFieldEnd(); + } + xfer += oprot->writeFieldBegin("data_page_offset", ::apache::thrift::protocol::T_I64, 9); + xfer += oprot->writeI64(this->data_page_offset); + xfer += oprot->writeFieldEnd(); + + if (this->__isset.index_page_offset) { + xfer += oprot->writeFieldBegin("index_page_offset", ::apache::thrift::protocol::T_I64, 10); + xfer += oprot->writeI64(this->index_page_offset); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.dictionary_page_offset) { + xfer += oprot->writeFieldBegin("dictionary_page_offset", ::apache::thrift::protocol::T_I64, 11); + xfer += oprot->writeI64(this->dictionary_page_offset); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.statistics) { + xfer += oprot->writeFieldBegin("statistics", ::apache::thrift::protocol::T_STRUCT, 12); + xfer += this->statistics.write(oprot); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.encoding_stats) { + xfer += oprot->writeFieldBegin("encoding_stats", ::apache::thrift::protocol::T_LIST, 13); + { + xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast(this->encoding_stats.size())); + std::vector ::const_iterator _iter197; + for (_iter197 = this->encoding_stats.begin(); _iter197 != this->encoding_stats.end(); ++_iter197) + { + xfer += (*_iter197).write(oprot); + } + xfer += oprot->writeListEnd(); + } + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.bloom_filter_offset) { + xfer += oprot->writeFieldBegin("bloom_filter_offset", ::apache::thrift::protocol::T_I64, 14); + xfer += oprot->writeI64(this->bloom_filter_offset); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.bloom_filter_length) { + xfer += oprot->writeFieldBegin("bloom_filter_length", ::apache::thrift::protocol::T_I32, 15); + xfer += oprot->writeI32(this->bloom_filter_length); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.size_statistics) { + xfer += oprot->writeFieldBegin("size_statistics", ::apache::thrift::protocol::T_STRUCT, 16); + xfer += this->size_statistics.write(oprot); + xfer += oprot->writeFieldEnd(); + } + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +template +uint32_t EncryptionWithFooterKey::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + xfer += iprot->skip(ftype); + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + return xfer; +} + +template +uint32_t EncryptionWithFooterKey::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("EncryptionWithFooterKey"); + + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +template +uint32_t EncryptionWithColumnKey::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + bool isset_path_in_schema = false; + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + switch (fid) + { + case 1: + if (ftype == ::apache::thrift::protocol::T_LIST) { + { + this->path_in_schema.clear(); + uint32_t _size206; + ::apache::thrift::protocol::TType _etype209; + xfer += iprot->readListBegin(_etype209, _size206); + this->path_in_schema.resize(_size206); + uint32_t _i210; + for (_i210 = 0; _i210 < _size206; ++_i210) + { + xfer += iprot->readString(this->path_in_schema[_i210]); + } + xfer += iprot->readListEnd(); + } + isset_path_in_schema = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 2: + if (ftype == ::apache::thrift::protocol::T_STRING) { + xfer += iprot->readBinary(this->key_metadata); + this->__isset.key_metadata = true; + } else { + xfer += iprot->skip(ftype); + } + break; + default: + xfer += iprot->skip(ftype); + break; + } + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + if (!isset_path_in_schema) + throw TProtocolException(TProtocolException::INVALID_DATA); + return xfer; +} + +template +uint32_t EncryptionWithColumnKey::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("EncryptionWithColumnKey"); + + xfer += oprot->writeFieldBegin("path_in_schema", ::apache::thrift::protocol::T_LIST, 1); + { + xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRING, static_cast(this->path_in_schema.size())); + std::vector ::const_iterator _iter211; + for (_iter211 = this->path_in_schema.begin(); _iter211 != this->path_in_schema.end(); ++_iter211) + { + xfer += oprot->writeString((*_iter211)); + } + xfer += oprot->writeListEnd(); + } + xfer += oprot->writeFieldEnd(); + + if (this->__isset.key_metadata) { + xfer += oprot->writeFieldBegin("key_metadata", ::apache::thrift::protocol::T_STRING, 2); + xfer += oprot->writeBinary(this->key_metadata); + xfer += oprot->writeFieldEnd(); + } + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +template +uint32_t ColumnCryptoMetaData::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + switch (fid) + { + case 1: + if (ftype == ::apache::thrift::protocol::T_STRUCT) { + xfer += this->ENCRYPTION_WITH_FOOTER_KEY.read(iprot); + this->__isset.ENCRYPTION_WITH_FOOTER_KEY = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 2: + if (ftype == ::apache::thrift::protocol::T_STRUCT) { + xfer += this->ENCRYPTION_WITH_COLUMN_KEY.read(iprot); + this->__isset.ENCRYPTION_WITH_COLUMN_KEY = true; + } else { + xfer += iprot->skip(ftype); + } + break; + default: + xfer += iprot->skip(ftype); + break; + } + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + return xfer; +} + +template +uint32_t ColumnCryptoMetaData::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("ColumnCryptoMetaData"); + + if (this->__isset.ENCRYPTION_WITH_FOOTER_KEY) { + xfer += oprot->writeFieldBegin("ENCRYPTION_WITH_FOOTER_KEY", ::apache::thrift::protocol::T_STRUCT, 1); + xfer += this->ENCRYPTION_WITH_FOOTER_KEY.write(oprot); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.ENCRYPTION_WITH_COLUMN_KEY) { + xfer += oprot->writeFieldBegin("ENCRYPTION_WITH_COLUMN_KEY", ::apache::thrift::protocol::T_STRUCT, 2); + xfer += this->ENCRYPTION_WITH_COLUMN_KEY.write(oprot); + xfer += oprot->writeFieldEnd(); + } + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +template +uint32_t ColumnChunk::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + bool isset_file_offset = false; + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + switch (fid) + { + case 1: + if (ftype == ::apache::thrift::protocol::T_STRING) { + xfer += iprot->readString(this->file_path); + this->__isset.file_path = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 2: + if (ftype == ::apache::thrift::protocol::T_I64) { + xfer += iprot->readI64(this->file_offset); + isset_file_offset = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 3: + if (ftype == ::apache::thrift::protocol::T_STRUCT) { + xfer += this->meta_data.read(iprot); + this->__isset.meta_data = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 4: + if (ftype == ::apache::thrift::protocol::T_I64) { + xfer += iprot->readI64(this->offset_index_offset); + this->__isset.offset_index_offset = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 5: + if (ftype == ::apache::thrift::protocol::T_I32) { + xfer += iprot->readI32(this->offset_index_length); + this->__isset.offset_index_length = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 6: + if (ftype == ::apache::thrift::protocol::T_I64) { + xfer += iprot->readI64(this->column_index_offset); + this->__isset.column_index_offset = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 7: + if (ftype == ::apache::thrift::protocol::T_I32) { + xfer += iprot->readI32(this->column_index_length); + this->__isset.column_index_length = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 8: + if (ftype == ::apache::thrift::protocol::T_STRUCT) { + xfer += this->crypto_metadata.read(iprot); + this->__isset.crypto_metadata = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 9: + if (ftype == ::apache::thrift::protocol::T_STRING) { + xfer += iprot->readBinary(this->encrypted_column_metadata); + this->__isset.encrypted_column_metadata = true; + } else { + xfer += iprot->skip(ftype); + } + break; + default: + xfer += iprot->skip(ftype); + break; + } + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + if (!isset_file_offset) + throw TProtocolException(TProtocolException::INVALID_DATA); + return xfer; +} + +template +uint32_t ColumnChunk::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("ColumnChunk"); + + if (this->__isset.file_path) { + xfer += oprot->writeFieldBegin("file_path", ::apache::thrift::protocol::T_STRING, 1); + xfer += oprot->writeString(this->file_path); + xfer += oprot->writeFieldEnd(); + } + xfer += oprot->writeFieldBegin("file_offset", ::apache::thrift::protocol::T_I64, 2); + xfer += oprot->writeI64(this->file_offset); + xfer += oprot->writeFieldEnd(); + + if (this->__isset.meta_data) { + xfer += oprot->writeFieldBegin("meta_data", ::apache::thrift::protocol::T_STRUCT, 3); + xfer += this->meta_data.write(oprot); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.offset_index_offset) { + xfer += oprot->writeFieldBegin("offset_index_offset", ::apache::thrift::protocol::T_I64, 4); + xfer += oprot->writeI64(this->offset_index_offset); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.offset_index_length) { + xfer += oprot->writeFieldBegin("offset_index_length", ::apache::thrift::protocol::T_I32, 5); + xfer += oprot->writeI32(this->offset_index_length); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.column_index_offset) { + xfer += oprot->writeFieldBegin("column_index_offset", ::apache::thrift::protocol::T_I64, 6); + xfer += oprot->writeI64(this->column_index_offset); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.column_index_length) { + xfer += oprot->writeFieldBegin("column_index_length", ::apache::thrift::protocol::T_I32, 7); + xfer += oprot->writeI32(this->column_index_length); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.crypto_metadata) { + xfer += oprot->writeFieldBegin("crypto_metadata", ::apache::thrift::protocol::T_STRUCT, 8); + xfer += this->crypto_metadata.write(oprot); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.encrypted_column_metadata) { + xfer += oprot->writeFieldBegin("encrypted_column_metadata", ::apache::thrift::protocol::T_STRING, 9); + xfer += oprot->writeBinary(this->encrypted_column_metadata); + xfer += oprot->writeFieldEnd(); + } + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +template +uint32_t RowGroup::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + bool isset_columns = false; + bool isset_total_byte_size = false; + bool isset_num_rows = false; + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + switch (fid) + { + case 1: + if (ftype == ::apache::thrift::protocol::T_LIST) { + { + this->columns.clear(); + uint32_t _size224; + ::apache::thrift::protocol::TType _etype227; + xfer += iprot->readListBegin(_etype227, _size224); + this->columns.resize(_size224); + uint32_t _i228; + for (_i228 = 0; _i228 < _size224; ++_i228) + { + xfer += this->columns[_i228].read(iprot); + } + xfer += iprot->readListEnd(); + } + isset_columns = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 2: + if (ftype == ::apache::thrift::protocol::T_I64) { + xfer += iprot->readI64(this->total_byte_size); + isset_total_byte_size = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 3: + if (ftype == ::apache::thrift::protocol::T_I64) { + xfer += iprot->readI64(this->num_rows); + isset_num_rows = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 4: + if (ftype == ::apache::thrift::protocol::T_LIST) { + { + this->sorting_columns.clear(); + uint32_t _size229; + ::apache::thrift::protocol::TType _etype232; + xfer += iprot->readListBegin(_etype232, _size229); + this->sorting_columns.resize(_size229); + uint32_t _i233; + for (_i233 = 0; _i233 < _size229; ++_i233) + { + xfer += this->sorting_columns[_i233].read(iprot); + } + xfer += iprot->readListEnd(); + } + this->__isset.sorting_columns = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 5: + if (ftype == ::apache::thrift::protocol::T_I64) { + xfer += iprot->readI64(this->file_offset); + this->__isset.file_offset = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 6: + if (ftype == ::apache::thrift::protocol::T_I64) { + xfer += iprot->readI64(this->total_compressed_size); + this->__isset.total_compressed_size = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 7: + if (ftype == ::apache::thrift::protocol::T_I16) { + xfer += iprot->readI16(this->ordinal); + this->__isset.ordinal = true; + } else { + xfer += iprot->skip(ftype); + } + break; + default: + xfer += iprot->skip(ftype); + break; + } + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + if (!isset_columns) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_total_byte_size) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_num_rows) + throw TProtocolException(TProtocolException::INVALID_DATA); + return xfer; +} + +template +uint32_t RowGroup::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("RowGroup"); + + xfer += oprot->writeFieldBegin("columns", ::apache::thrift::protocol::T_LIST, 1); + { + xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast(this->columns.size())); + std::vector ::const_iterator _iter234; + for (_iter234 = this->columns.begin(); _iter234 != this->columns.end(); ++_iter234) + { + xfer += (*_iter234).write(oprot); + } + xfer += oprot->writeListEnd(); + } + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldBegin("total_byte_size", ::apache::thrift::protocol::T_I64, 2); + xfer += oprot->writeI64(this->total_byte_size); + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldBegin("num_rows", ::apache::thrift::protocol::T_I64, 3); + xfer += oprot->writeI64(this->num_rows); + xfer += oprot->writeFieldEnd(); + + if (this->__isset.sorting_columns) { + xfer += oprot->writeFieldBegin("sorting_columns", ::apache::thrift::protocol::T_LIST, 4); + { + xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast(this->sorting_columns.size())); + std::vector ::const_iterator _iter235; + for (_iter235 = this->sorting_columns.begin(); _iter235 != this->sorting_columns.end(); ++_iter235) + { + xfer += (*_iter235).write(oprot); + } + xfer += oprot->writeListEnd(); + } + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.file_offset) { + xfer += oprot->writeFieldBegin("file_offset", ::apache::thrift::protocol::T_I64, 5); + xfer += oprot->writeI64(this->file_offset); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.total_compressed_size) { + xfer += oprot->writeFieldBegin("total_compressed_size", ::apache::thrift::protocol::T_I64, 6); + xfer += oprot->writeI64(this->total_compressed_size); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.ordinal) { + xfer += oprot->writeFieldBegin("ordinal", ::apache::thrift::protocol::T_I16, 7); + xfer += oprot->writeI16(this->ordinal); + xfer += oprot->writeFieldEnd(); + } + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +template +uint32_t TypeDefinedOrder::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + xfer += iprot->skip(ftype); + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + return xfer; +} + +template +uint32_t TypeDefinedOrder::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("TypeDefinedOrder"); + + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +template +uint32_t ColumnOrder::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + switch (fid) + { + case 1: + if (ftype == ::apache::thrift::protocol::T_STRUCT) { + xfer += this->TYPE_ORDER.read(iprot); + this->__isset.TYPE_ORDER = true; + } else { + xfer += iprot->skip(ftype); + } + break; + default: + xfer += iprot->skip(ftype); + break; + } + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + return xfer; +} + +template +uint32_t ColumnOrder::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("ColumnOrder"); + + if (this->__isset.TYPE_ORDER) { + xfer += oprot->writeFieldBegin("TYPE_ORDER", ::apache::thrift::protocol::T_STRUCT, 1); + xfer += this->TYPE_ORDER.write(oprot); + xfer += oprot->writeFieldEnd(); + } + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +template +uint32_t PageLocation::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + bool isset_offset = false; + bool isset_compressed_page_size = false; + bool isset_first_row_index = false; + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + switch (fid) + { + case 1: + if (ftype == ::apache::thrift::protocol::T_I64) { + xfer += iprot->readI64(this->offset); + isset_offset = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 2: + if (ftype == ::apache::thrift::protocol::T_I32) { + xfer += iprot->readI32(this->compressed_page_size); + isset_compressed_page_size = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 3: + if (ftype == ::apache::thrift::protocol::T_I64) { + xfer += iprot->readI64(this->first_row_index); + isset_first_row_index = true; + } else { + xfer += iprot->skip(ftype); + } + break; + default: + xfer += iprot->skip(ftype); + break; + } + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + if (!isset_offset) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_compressed_page_size) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_first_row_index) + throw TProtocolException(TProtocolException::INVALID_DATA); + return xfer; +} + +template +uint32_t PageLocation::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("PageLocation"); + + xfer += oprot->writeFieldBegin("offset", ::apache::thrift::protocol::T_I64, 1); + xfer += oprot->writeI64(this->offset); + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldBegin("compressed_page_size", ::apache::thrift::protocol::T_I32, 2); + xfer += oprot->writeI32(this->compressed_page_size); + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldBegin("first_row_index", ::apache::thrift::protocol::T_I64, 3); + xfer += oprot->writeI64(this->first_row_index); + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +template +uint32_t OffsetIndex::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + bool isset_page_locations = false; + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + switch (fid) + { + case 1: + if (ftype == ::apache::thrift::protocol::T_LIST) { + { + this->page_locations.clear(); + uint32_t _size252; + ::apache::thrift::protocol::TType _etype255; + xfer += iprot->readListBegin(_etype255, _size252); + this->page_locations.resize(_size252); + uint32_t _i256; + for (_i256 = 0; _i256 < _size252; ++_i256) + { + xfer += this->page_locations[_i256].read(iprot); + } + xfer += iprot->readListEnd(); + } + isset_page_locations = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 2: + if (ftype == ::apache::thrift::protocol::T_LIST) { + { + this->unencoded_byte_array_data_bytes.clear(); + uint32_t _size257; + ::apache::thrift::protocol::TType _etype260; + xfer += iprot->readListBegin(_etype260, _size257); + this->unencoded_byte_array_data_bytes.resize(_size257); + uint32_t _i261; + for (_i261 = 0; _i261 < _size257; ++_i261) + { + xfer += iprot->readI64(this->unencoded_byte_array_data_bytes[_i261]); + } + xfer += iprot->readListEnd(); + } + this->__isset.unencoded_byte_array_data_bytes = true; + } else { + xfer += iprot->skip(ftype); + } + break; + default: + xfer += iprot->skip(ftype); + break; + } + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + if (!isset_page_locations) + throw TProtocolException(TProtocolException::INVALID_DATA); + return xfer; +} + +template +uint32_t OffsetIndex::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("OffsetIndex"); + + xfer += oprot->writeFieldBegin("page_locations", ::apache::thrift::protocol::T_LIST, 1); + { + xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast(this->page_locations.size())); + std::vector ::const_iterator _iter262; + for (_iter262 = this->page_locations.begin(); _iter262 != this->page_locations.end(); ++_iter262) + { + xfer += (*_iter262).write(oprot); + } + xfer += oprot->writeListEnd(); + } + xfer += oprot->writeFieldEnd(); + + if (this->__isset.unencoded_byte_array_data_bytes) { + xfer += oprot->writeFieldBegin("unencoded_byte_array_data_bytes", ::apache::thrift::protocol::T_LIST, 2); + { + xfer += oprot->writeListBegin(::apache::thrift::protocol::T_I64, static_cast(this->unencoded_byte_array_data_bytes.size())); + std::vector ::const_iterator _iter263; + for (_iter263 = this->unencoded_byte_array_data_bytes.begin(); _iter263 != this->unencoded_byte_array_data_bytes.end(); ++_iter263) + { + xfer += oprot->writeI64((*_iter263)); + } + xfer += oprot->writeListEnd(); + } + xfer += oprot->writeFieldEnd(); + } + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +template +uint32_t ColumnIndex::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + bool isset_null_pages = false; + bool isset_min_values = false; + bool isset_max_values = false; + bool isset_boundary_order = false; + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + switch (fid) + { + case 1: + if (ftype == ::apache::thrift::protocol::T_LIST) { + { + this->null_pages.clear(); + uint32_t _size268; + ::apache::thrift::protocol::TType _etype271; + xfer += iprot->readListBegin(_etype271, _size268); + this->null_pages.resize(_size268); + uint32_t _i272; + for (_i272 = 0; _i272 < _size268; ++_i272) + { + xfer += iprot->readBool(this->null_pages[_i272]); + } + xfer += iprot->readListEnd(); + } + isset_null_pages = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 2: + if (ftype == ::apache::thrift::protocol::T_LIST) { + { + this->min_values.clear(); + uint32_t _size273; + ::apache::thrift::protocol::TType _etype276; + xfer += iprot->readListBegin(_etype276, _size273); + this->min_values.resize(_size273); + uint32_t _i277; + for (_i277 = 0; _i277 < _size273; ++_i277) + { + xfer += iprot->readBinary(this->min_values[_i277]); + } + xfer += iprot->readListEnd(); + } + isset_min_values = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 3: + if (ftype == ::apache::thrift::protocol::T_LIST) { + { + this->max_values.clear(); + uint32_t _size278; + ::apache::thrift::protocol::TType _etype281; + xfer += iprot->readListBegin(_etype281, _size278); + this->max_values.resize(_size278); + uint32_t _i282; + for (_i282 = 0; _i282 < _size278; ++_i282) + { + xfer += iprot->readBinary(this->max_values[_i282]); + } + xfer += iprot->readListEnd(); + } + isset_max_values = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 4: + if (ftype == ::apache::thrift::protocol::T_I32) { + int32_t ecast283; + xfer += iprot->readI32(ecast283); + this->boundary_order = static_cast(ecast283); + isset_boundary_order = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 5: + if (ftype == ::apache::thrift::protocol::T_LIST) { + { + this->null_counts.clear(); + uint32_t _size284; + ::apache::thrift::protocol::TType _etype287; + xfer += iprot->readListBegin(_etype287, _size284); + this->null_counts.resize(_size284); + uint32_t _i288; + for (_i288 = 0; _i288 < _size284; ++_i288) + { + xfer += iprot->readI64(this->null_counts[_i288]); + } + xfer += iprot->readListEnd(); + } + this->__isset.null_counts = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 6: + if (ftype == ::apache::thrift::protocol::T_LIST) { + { + this->repetition_level_histograms.clear(); + uint32_t _size289; + ::apache::thrift::protocol::TType _etype292; + xfer += iprot->readListBegin(_etype292, _size289); + this->repetition_level_histograms.resize(_size289); + uint32_t _i293; + for (_i293 = 0; _i293 < _size289; ++_i293) + { + xfer += iprot->readI64(this->repetition_level_histograms[_i293]); + } + xfer += iprot->readListEnd(); + } + this->__isset.repetition_level_histograms = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 7: + if (ftype == ::apache::thrift::protocol::T_LIST) { + { + this->definition_level_histograms.clear(); + uint32_t _size294; + ::apache::thrift::protocol::TType _etype297; + xfer += iprot->readListBegin(_etype297, _size294); + this->definition_level_histograms.resize(_size294); + uint32_t _i298; + for (_i298 = 0; _i298 < _size294; ++_i298) + { + xfer += iprot->readI64(this->definition_level_histograms[_i298]); + } + xfer += iprot->readListEnd(); + } + this->__isset.definition_level_histograms = true; + } else { + xfer += iprot->skip(ftype); + } + break; + default: + xfer += iprot->skip(ftype); + break; + } + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + if (!isset_null_pages) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_min_values) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_max_values) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_boundary_order) + throw TProtocolException(TProtocolException::INVALID_DATA); + return xfer; +} + +template +uint32_t ColumnIndex::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("ColumnIndex"); + + xfer += oprot->writeFieldBegin("null_pages", ::apache::thrift::protocol::T_LIST, 1); + { + xfer += oprot->writeListBegin(::apache::thrift::protocol::T_BOOL, static_cast(this->null_pages.size())); + std::vector ::const_iterator _iter299; + for (_iter299 = this->null_pages.begin(); _iter299 != this->null_pages.end(); ++_iter299) + { + xfer += oprot->writeBool((*_iter299)); + } + xfer += oprot->writeListEnd(); + } + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldBegin("min_values", ::apache::thrift::protocol::T_LIST, 2); + { + xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRING, static_cast(this->min_values.size())); + std::vector ::const_iterator _iter300; + for (_iter300 = this->min_values.begin(); _iter300 != this->min_values.end(); ++_iter300) + { + xfer += oprot->writeBinary((*_iter300)); + } + xfer += oprot->writeListEnd(); + } + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldBegin("max_values", ::apache::thrift::protocol::T_LIST, 3); + { + xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRING, static_cast(this->max_values.size())); + std::vector ::const_iterator _iter301; + for (_iter301 = this->max_values.begin(); _iter301 != this->max_values.end(); ++_iter301) + { + xfer += oprot->writeBinary((*_iter301)); + } + xfer += oprot->writeListEnd(); + } + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldBegin("boundary_order", ::apache::thrift::protocol::T_I32, 4); + xfer += oprot->writeI32(static_cast(this->boundary_order)); + xfer += oprot->writeFieldEnd(); + + if (this->__isset.null_counts) { + xfer += oprot->writeFieldBegin("null_counts", ::apache::thrift::protocol::T_LIST, 5); + { + xfer += oprot->writeListBegin(::apache::thrift::protocol::T_I64, static_cast(this->null_counts.size())); + std::vector ::const_iterator _iter302; + for (_iter302 = this->null_counts.begin(); _iter302 != this->null_counts.end(); ++_iter302) + { + xfer += oprot->writeI64((*_iter302)); + } + xfer += oprot->writeListEnd(); + } + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.repetition_level_histograms) { + xfer += oprot->writeFieldBegin("repetition_level_histograms", ::apache::thrift::protocol::T_LIST, 6); + { + xfer += oprot->writeListBegin(::apache::thrift::protocol::T_I64, static_cast(this->repetition_level_histograms.size())); + std::vector ::const_iterator _iter303; + for (_iter303 = this->repetition_level_histograms.begin(); _iter303 != this->repetition_level_histograms.end(); ++_iter303) + { + xfer += oprot->writeI64((*_iter303)); + } + xfer += oprot->writeListEnd(); + } + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.definition_level_histograms) { + xfer += oprot->writeFieldBegin("definition_level_histograms", ::apache::thrift::protocol::T_LIST, 7); + { + xfer += oprot->writeListBegin(::apache::thrift::protocol::T_I64, static_cast(this->definition_level_histograms.size())); + std::vector ::const_iterator _iter304; + for (_iter304 = this->definition_level_histograms.begin(); _iter304 != this->definition_level_histograms.end(); ++_iter304) + { + xfer += oprot->writeI64((*_iter304)); + } + xfer += oprot->writeListEnd(); + } + xfer += oprot->writeFieldEnd(); + } + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +template +uint32_t AesGcmV1::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + switch (fid) + { + case 1: + if (ftype == ::apache::thrift::protocol::T_STRING) { + xfer += iprot->readBinary(this->aad_prefix); + this->__isset.aad_prefix = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 2: + if (ftype == ::apache::thrift::protocol::T_STRING) { + xfer += iprot->readBinary(this->aad_file_unique); + this->__isset.aad_file_unique = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 3: + if (ftype == ::apache::thrift::protocol::T_BOOL) { + xfer += iprot->readBool(this->supply_aad_prefix); + this->__isset.supply_aad_prefix = true; + } else { + xfer += iprot->skip(ftype); + } + break; + default: + xfer += iprot->skip(ftype); + break; + } + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + return xfer; +} + +template +uint32_t AesGcmV1::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("AesGcmV1"); + + if (this->__isset.aad_prefix) { + xfer += oprot->writeFieldBegin("aad_prefix", ::apache::thrift::protocol::T_STRING, 1); + xfer += oprot->writeBinary(this->aad_prefix); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.aad_file_unique) { + xfer += oprot->writeFieldBegin("aad_file_unique", ::apache::thrift::protocol::T_STRING, 2); + xfer += oprot->writeBinary(this->aad_file_unique); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.supply_aad_prefix) { + xfer += oprot->writeFieldBegin("supply_aad_prefix", ::apache::thrift::protocol::T_BOOL, 3); + xfer += oprot->writeBool(this->supply_aad_prefix); + xfer += oprot->writeFieldEnd(); + } + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +template +uint32_t AesGcmCtrV1::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + switch (fid) + { + case 1: + if (ftype == ::apache::thrift::protocol::T_STRING) { + xfer += iprot->readBinary(this->aad_prefix); + this->__isset.aad_prefix = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 2: + if (ftype == ::apache::thrift::protocol::T_STRING) { + xfer += iprot->readBinary(this->aad_file_unique); + this->__isset.aad_file_unique = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 3: + if (ftype == ::apache::thrift::protocol::T_BOOL) { + xfer += iprot->readBool(this->supply_aad_prefix); + this->__isset.supply_aad_prefix = true; + } else { + xfer += iprot->skip(ftype); + } + break; + default: + xfer += iprot->skip(ftype); + break; + } + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + return xfer; +} + +template +uint32_t AesGcmCtrV1::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("AesGcmCtrV1"); + + if (this->__isset.aad_prefix) { + xfer += oprot->writeFieldBegin("aad_prefix", ::apache::thrift::protocol::T_STRING, 1); + xfer += oprot->writeBinary(this->aad_prefix); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.aad_file_unique) { + xfer += oprot->writeFieldBegin("aad_file_unique", ::apache::thrift::protocol::T_STRING, 2); + xfer += oprot->writeBinary(this->aad_file_unique); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.supply_aad_prefix) { + xfer += oprot->writeFieldBegin("supply_aad_prefix", ::apache::thrift::protocol::T_BOOL, 3); + xfer += oprot->writeBool(this->supply_aad_prefix); + xfer += oprot->writeFieldEnd(); + } + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +template +uint32_t EncryptionAlgorithm::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + switch (fid) + { + case 1: + if (ftype == ::apache::thrift::protocol::T_STRUCT) { + xfer += this->AES_GCM_V1.read(iprot); + this->__isset.AES_GCM_V1 = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 2: + if (ftype == ::apache::thrift::protocol::T_STRUCT) { + xfer += this->AES_GCM_CTR_V1.read(iprot); + this->__isset.AES_GCM_CTR_V1 = true; + } else { + xfer += iprot->skip(ftype); + } + break; + default: + xfer += iprot->skip(ftype); + break; + } + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + return xfer; +} + +template +uint32_t EncryptionAlgorithm::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("EncryptionAlgorithm"); + + if (this->__isset.AES_GCM_V1) { + xfer += oprot->writeFieldBegin("AES_GCM_V1", ::apache::thrift::protocol::T_STRUCT, 1); + xfer += this->AES_GCM_V1.write(oprot); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.AES_GCM_CTR_V1) { + xfer += oprot->writeFieldBegin("AES_GCM_CTR_V1", ::apache::thrift::protocol::T_STRUCT, 2); + xfer += this->AES_GCM_CTR_V1.write(oprot); + xfer += oprot->writeFieldEnd(); + } + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +template +uint32_t FileMetaData::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + bool isset_version = false; + bool isset_schema = false; + bool isset_num_rows = false; + bool isset_row_groups = false; + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + switch (fid) + { + case 1: + if (ftype == ::apache::thrift::protocol::T_I32) { + xfer += iprot->readI32(this->version); + isset_version = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 2: + if (ftype == ::apache::thrift::protocol::T_LIST) { + { + this->schema.clear(); + uint32_t _size321; + ::apache::thrift::protocol::TType _etype324; + xfer += iprot->readListBegin(_etype324, _size321); + this->schema.resize(_size321); + uint32_t _i325; + for (_i325 = 0; _i325 < _size321; ++_i325) + { + xfer += this->schema[_i325].read(iprot); + } + xfer += iprot->readListEnd(); + } + isset_schema = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 3: + if (ftype == ::apache::thrift::protocol::T_I64) { + xfer += iprot->readI64(this->num_rows); + isset_num_rows = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 4: + if (ftype == ::apache::thrift::protocol::T_LIST) { + { + this->row_groups.clear(); + uint32_t _size326; + ::apache::thrift::protocol::TType _etype329; + xfer += iprot->readListBegin(_etype329, _size326); + this->row_groups.resize(_size326); + uint32_t _i330; + for (_i330 = 0; _i330 < _size326; ++_i330) + { + xfer += this->row_groups[_i330].read(iprot); + } + xfer += iprot->readListEnd(); + } + isset_row_groups = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 5: + if (ftype == ::apache::thrift::protocol::T_LIST) { + { + this->key_value_metadata.clear(); + uint32_t _size331; + ::apache::thrift::protocol::TType _etype334; + xfer += iprot->readListBegin(_etype334, _size331); + this->key_value_metadata.resize(_size331); + uint32_t _i335; + for (_i335 = 0; _i335 < _size331; ++_i335) + { + xfer += this->key_value_metadata[_i335].read(iprot); + } + xfer += iprot->readListEnd(); + } + this->__isset.key_value_metadata = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 6: + if (ftype == ::apache::thrift::protocol::T_STRING) { + xfer += iprot->readString(this->created_by); + this->__isset.created_by = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 7: + if (ftype == ::apache::thrift::protocol::T_LIST) { + { + this->column_orders.clear(); + uint32_t _size336; + ::apache::thrift::protocol::TType _etype339; + xfer += iprot->readListBegin(_etype339, _size336); + this->column_orders.resize(_size336); + uint32_t _i340; + for (_i340 = 0; _i340 < _size336; ++_i340) + { + xfer += this->column_orders[_i340].read(iprot); + } + xfer += iprot->readListEnd(); + } + this->__isset.column_orders = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 8: + if (ftype == ::apache::thrift::protocol::T_STRUCT) { + xfer += this->encryption_algorithm.read(iprot); + this->__isset.encryption_algorithm = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 9: + if (ftype == ::apache::thrift::protocol::T_STRING) { + xfer += iprot->readBinary(this->footer_signing_key_metadata); + this->__isset.footer_signing_key_metadata = true; + } else { + xfer += iprot->skip(ftype); + } + break; + default: + xfer += iprot->skip(ftype); + break; + } + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + if (!isset_version) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_schema) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_num_rows) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_row_groups) + throw TProtocolException(TProtocolException::INVALID_DATA); + return xfer; +} + +template +uint32_t FileMetaData::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("FileMetaData"); + + xfer += oprot->writeFieldBegin("version", ::apache::thrift::protocol::T_I32, 1); + xfer += oprot->writeI32(this->version); + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldBegin("schema", ::apache::thrift::protocol::T_LIST, 2); + { + xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast(this->schema.size())); + std::vector ::const_iterator _iter341; + for (_iter341 = this->schema.begin(); _iter341 != this->schema.end(); ++_iter341) + { + xfer += (*_iter341).write(oprot); + } + xfer += oprot->writeListEnd(); + } + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldBegin("num_rows", ::apache::thrift::protocol::T_I64, 3); + xfer += oprot->writeI64(this->num_rows); + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldBegin("row_groups", ::apache::thrift::protocol::T_LIST, 4); + { + xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast(this->row_groups.size())); + std::vector ::const_iterator _iter342; + for (_iter342 = this->row_groups.begin(); _iter342 != this->row_groups.end(); ++_iter342) + { + xfer += (*_iter342).write(oprot); + } + xfer += oprot->writeListEnd(); + } + xfer += oprot->writeFieldEnd(); + + if (this->__isset.key_value_metadata) { + xfer += oprot->writeFieldBegin("key_value_metadata", ::apache::thrift::protocol::T_LIST, 5); + { + xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast(this->key_value_metadata.size())); + std::vector ::const_iterator _iter343; + for (_iter343 = this->key_value_metadata.begin(); _iter343 != this->key_value_metadata.end(); ++_iter343) + { + xfer += (*_iter343).write(oprot); + } + xfer += oprot->writeListEnd(); + } + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.created_by) { + xfer += oprot->writeFieldBegin("created_by", ::apache::thrift::protocol::T_STRING, 6); + xfer += oprot->writeString(this->created_by); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.column_orders) { + xfer += oprot->writeFieldBegin("column_orders", ::apache::thrift::protocol::T_LIST, 7); + { + xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast(this->column_orders.size())); + std::vector ::const_iterator _iter344; + for (_iter344 = this->column_orders.begin(); _iter344 != this->column_orders.end(); ++_iter344) + { + xfer += (*_iter344).write(oprot); + } + xfer += oprot->writeListEnd(); + } + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.encryption_algorithm) { + xfer += oprot->writeFieldBegin("encryption_algorithm", ::apache::thrift::protocol::T_STRUCT, 8); + xfer += this->encryption_algorithm.write(oprot); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.footer_signing_key_metadata) { + xfer += oprot->writeFieldBegin("footer_signing_key_metadata", ::apache::thrift::protocol::T_STRING, 9); + xfer += oprot->writeBinary(this->footer_signing_key_metadata); + xfer += oprot->writeFieldEnd(); + } + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +template +uint32_t FileCryptoMetaData::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + bool isset_encryption_algorithm = false; + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + switch (fid) + { + case 1: + if (ftype == ::apache::thrift::protocol::T_STRUCT) { + xfer += this->encryption_algorithm.read(iprot); + isset_encryption_algorithm = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 2: + if (ftype == ::apache::thrift::protocol::T_STRING) { + xfer += iprot->readBinary(this->key_metadata); + this->__isset.key_metadata = true; + } else { + xfer += iprot->skip(ftype); + } + break; + default: + xfer += iprot->skip(ftype); + break; + } + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + if (!isset_encryption_algorithm) + throw TProtocolException(TProtocolException::INVALID_DATA); + return xfer; +} + +template +uint32_t FileCryptoMetaData::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("FileCryptoMetaData"); + + xfer += oprot->writeFieldBegin("encryption_algorithm", ::apache::thrift::protocol::T_STRUCT, 1); + xfer += this->encryption_algorithm.write(oprot); + xfer += oprot->writeFieldEnd(); + + if (this->__isset.key_metadata) { + xfer += oprot->writeFieldBegin("key_metadata", ::apache::thrift::protocol::T_STRING, 2); + xfer += oprot->writeBinary(this->key_metadata); + xfer += oprot->writeFieldEnd(); + } + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +}} // namespace + +#endif diff --git a/cpp/src/parquet/CMakeLists.txt b/cpp/src/parquet/CMakeLists.txt index cc9f1abbeccf7..a0aaeb72c5ebe 100644 --- a/cpp/src/parquet/CMakeLists.txt +++ b/cpp/src/parquet/CMakeLists.txt @@ -398,6 +398,7 @@ add_parquet_test(writer-test add_parquet_test(arrow-test SOURCES + arrow/arrow_metadata_test.cc arrow/arrow_reader_writer_test.cc arrow/arrow_schema_test.cc arrow/arrow_statistics_test.cc) @@ -433,6 +434,7 @@ add_parquet_benchmark(column_reader_benchmark) add_parquet_benchmark(column_io_benchmark) add_parquet_benchmark(encoding_benchmark) add_parquet_benchmark(level_conversion_benchmark) +add_parquet_benchmark(metadata_benchmark) add_parquet_benchmark(page_index_benchmark SOURCES page_index_benchmark.cc benchmark_util.cc) add_parquet_benchmark(arrow/reader_writer_benchmark PREFIX "parquet-arrow") diff --git a/cpp/src/parquet/arrow/arrow_metadata_test.cc b/cpp/src/parquet/arrow/arrow_metadata_test.cc new file mode 100644 index 0000000000000..6f512227708b9 --- /dev/null +++ b/cpp/src/parquet/arrow/arrow_metadata_test.cc @@ -0,0 +1,97 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "gtest/gtest.h" + +#include "arrow/table.h" +#include "arrow/testing/gtest_util.h" +#include "arrow/util/key_value_metadata.h" + +#include "parquet/api/writer.h" + +#include "parquet/arrow/reader.h" +#include "parquet/arrow/schema.h" +#include "parquet/arrow/writer.h" +#include "parquet/file_writer.h" +#include "parquet/test_util.h" + +namespace parquet::arrow { + +TEST(Metadata, AppendMetadata) { + // A sample table, type and structure does not matter in this test case + auto schema = ::arrow::schema({::arrow::field("f", ::arrow::utf8())}); + auto table = ::arrow::Table::Make( + schema, {::arrow::ArrayFromJSON(::arrow::utf8(), R"(["a", "b", "c"])")}); + + auto sink = CreateOutputStream(); + ArrowWriterProperties::Builder builder; + builder.store_schema(); + ASSERT_OK_AND_ASSIGN(auto writer, + parquet::arrow::FileWriter::Open( + *schema, ::arrow::default_memory_pool(), sink, + parquet::default_writer_properties(), builder.build())); + + auto kv_meta = std::make_shared(); + kv_meta->Append("test_key_1", "test_value_1"); + // would be overwritten later. + kv_meta->Append("test_key_2", "test_value_2_temp"); + ASSERT_OK(writer->AddKeyValueMetadata(kv_meta)); + + // Key value metadata that will be added to the file. + auto kv_meta_added = std::make_shared<::arrow::KeyValueMetadata>(); + kv_meta_added->Append("test_key_2", "test_value_2"); + kv_meta_added->Append("test_key_3", "test_value_3"); + + ASSERT_OK(writer->AddKeyValueMetadata(kv_meta_added)); + ASSERT_OK(writer->Close()); + + // return error if the file is closed + ASSERT_RAISES(IOError, writer->AddKeyValueMetadata(kv_meta_added)); + + auto verify_key_value_metadata = + [&](const std::shared_ptr& key_value_metadata) { + ASSERT_TRUE(nullptr != key_value_metadata); + + // Verify keys that were added before file writer was closed are present. + for (int i = 1; i <= 3; ++i) { + auto index = std::to_string(i); + PARQUET_ASSIGN_OR_THROW(auto value, + key_value_metadata->Get("test_key_" + index)); + EXPECT_EQ("test_value_" + index, value); + } + EXPECT_TRUE(key_value_metadata->Contains("ARROW:schema")); + }; + // verify the metadata in writer + verify_key_value_metadata(writer->metadata()->key_value_metadata()); + + ASSERT_OK(writer->Close()); + + ASSERT_OK_AND_ASSIGN(auto buffer, sink->Finish()); + // verify the metadata in reader + { + std::unique_ptr reader; + FileReaderBuilder reader_builder; + ASSERT_OK_NO_THROW( + reader_builder.Open(std::make_shared<::arrow::io::BufferReader>(buffer))); + ASSERT_OK( + reader_builder.properties(default_arrow_reader_properties())->Build(&reader)); + + verify_key_value_metadata(reader->parquet_reader()->metadata()->key_value_metadata()); + } +} + +} // namespace parquet::arrow diff --git a/cpp/src/parquet/arrow/arrow_reader_writer_test.cc b/cpp/src/parquet/arrow/arrow_reader_writer_test.cc index eefd823dfb385..f71adb380ba2f 100644 --- a/cpp/src/parquet/arrow/arrow_reader_writer_test.cc +++ b/cpp/src/parquet/arrow/arrow_reader_writer_test.cc @@ -4471,7 +4471,7 @@ class TestArrowReadDictionary : public ::testing::TestWithParam { RETURN_NOT_OK(builder.Open(std::make_shared(buffer_))); RETURN_NOT_OK(builder.properties(properties_)->Build(&reader)); - return std::move(reader); + return reader; } }; diff --git a/cpp/src/parquet/arrow/reader.cc b/cpp/src/parquet/arrow/reader.cc index d6ad7c25bc7c1..285e2a597389d 100644 --- a/cpp/src/parquet/arrow/reader.cc +++ b/cpp/src/parquet/arrow/reader.cc @@ -1043,6 +1043,16 @@ Status FileReaderImpl::GetRecordBatchReader(const std::vector& row_groups, } } + // Check all columns has same row-size + if (!columns.empty()) { + int64_t row_size = columns[0]->length(); + for (size_t i = 1; i < columns.size(); ++i) { + if (columns[i]->length() != row_size) { + return ::arrow::Status::Invalid("columns do not have the same size"); + } + } + } + auto table = ::arrow::Table::Make(batch_schema, std::move(columns)); auto table_reader = std::make_shared<::arrow::TableBatchReader>(*table); diff --git a/cpp/src/parquet/arrow/writer.cc b/cpp/src/parquet/arrow/writer.cc index 5238986c428d3..4fd7ef1b47b39 100644 --- a/cpp/src/parquet/arrow/writer.cc +++ b/cpp/src/parquet/arrow/writer.cc @@ -482,6 +482,14 @@ class FileWriterImpl : public FileWriter { return writer_->metadata(); } + /// \brief Append the key-value metadata to the file metadata + ::arrow::Status AddKeyValueMetadata( + const std::shared_ptr& key_value_metadata) + override { + PARQUET_CATCH_NOT_OK(writer_->AddKeyValueMetadata(key_value_metadata)); + return Status::OK(); + } + private: friend class FileWriter; @@ -547,8 +555,8 @@ Status GetSchemaMetadata(const ::arrow::Schema& schema, ::arrow::MemoryPool* poo // The serialized schema is not UTF-8, which is required for Thrift std::string schema_as_string = serialized->ToString(); std::string schema_base64 = ::arrow::util::base64_encode(schema_as_string); - result->Append(kArrowSchemaKey, schema_base64); - *out = result; + result->Append(kArrowSchemaKey, std::move(schema_base64)); + *out = std::move(result); return Status::OK(); } diff --git a/cpp/src/parquet/arrow/writer.h b/cpp/src/parquet/arrow/writer.h index 1decafedc97fd..4a1a033a7b7b8 100644 --- a/cpp/src/parquet/arrow/writer.h +++ b/cpp/src/parquet/arrow/writer.h @@ -143,6 +143,16 @@ class PARQUET_EXPORT FileWriter { virtual ~FileWriter(); virtual MemoryPool* memory_pool() const = 0; + /// \brief Add key-value metadata to the file. + /// \param[in] key_value_metadata the metadata to add. + /// \note This will overwrite any existing metadata with the same key. + /// \return Error if Close() has been called. + /// + /// WARNING: If `store_schema` is enabled, `ARROW:schema` would be stored + /// in the key-value metadata. Overwriting this key would result in + /// `store_schema` being unusable during read. + virtual ::arrow::Status AddKeyValueMetadata( + const std::shared_ptr& key_value_metadata) = 0; /// \brief Return the file metadata, only available after calling Close(). virtual const std::shared_ptr metadata() const = 0; }; diff --git a/cpp/src/parquet/column_page.h b/cpp/src/parquet/column_page.h index 905f805b8c9cc..b389ffd98e6c7 100644 --- a/cpp/src/parquet/column_page.h +++ b/cpp/src/parquet/column_page.h @@ -75,13 +75,13 @@ class DataPage : public Page { protected: DataPage(PageType::type type, const std::shared_ptr& buffer, int32_t num_values, Encoding::type encoding, int64_t uncompressed_size, - const EncodedStatistics& statistics = EncodedStatistics(), + EncodedStatistics statistics = EncodedStatistics(), std::optional first_row_index = std::nullopt) : Page(buffer, type), num_values_(num_values), encoding_(encoding), uncompressed_size_(uncompressed_size), - statistics_(statistics), + statistics_(std::move(statistics)), first_row_index_(std::move(first_row_index)) {} int32_t num_values_; @@ -97,10 +97,10 @@ class DataPageV1 : public DataPage { DataPageV1(const std::shared_ptr& buffer, int32_t num_values, Encoding::type encoding, Encoding::type definition_level_encoding, Encoding::type repetition_level_encoding, int64_t uncompressed_size, - const EncodedStatistics& statistics = EncodedStatistics(), + EncodedStatistics statistics = EncodedStatistics(), std::optional first_row_index = std::nullopt) : DataPage(PageType::DATA_PAGE, buffer, num_values, encoding, uncompressed_size, - statistics, std::move(first_row_index)), + std::move(statistics), std::move(first_row_index)), definition_level_encoding_(definition_level_encoding), repetition_level_encoding_(repetition_level_encoding) {} @@ -119,10 +119,10 @@ class DataPageV2 : public DataPage { int32_t num_rows, Encoding::type encoding, int32_t definition_levels_byte_length, int32_t repetition_levels_byte_length, int64_t uncompressed_size, bool is_compressed = false, - const EncodedStatistics& statistics = EncodedStatistics(), + EncodedStatistics statistics = EncodedStatistics(), std::optional first_row_index = std::nullopt) : DataPage(PageType::DATA_PAGE_V2, buffer, num_values, encoding, uncompressed_size, - statistics, std::move(first_row_index)), + std::move(statistics), std::move(first_row_index)), num_nulls_(num_nulls), num_rows_(num_rows), definition_levels_byte_length_(definition_levels_byte_length), diff --git a/cpp/src/parquet/column_reader.cc b/cpp/src/parquet/column_reader.cc index eae7ac4252735..407201a89ef08 100644 --- a/cpp/src/parquet/column_reader.cc +++ b/cpp/src/parquet/column_reader.cc @@ -101,6 +101,10 @@ inline void CheckNumberDecoded(int64_t number_decoded, int64_t expected) { std::to_string(expected)); } } + +constexpr std::string_view kErrorRepDefLevelNotMatchesNumValues = + "Number of decoded rep / def levels do not match num_values in page header"; + } // namespace LevelDecoder::LevelDecoder() : num_values_remaining_(0) {} @@ -534,11 +538,11 @@ std::shared_ptr SerializedPageReader::NextPage() { page_buffer = DecompressIfNeeded(std::move(page_buffer), compressed_len, uncompressed_len); - return std::make_shared(page_buffer, header.num_values, - LoadEnumSafe(&header.encoding), - LoadEnumSafe(&header.definition_level_encoding), - LoadEnumSafe(&header.repetition_level_encoding), - uncompressed_len, data_page_statistics); + return std::make_shared( + page_buffer, header.num_values, LoadEnumSafe(&header.encoding), + LoadEnumSafe(&header.definition_level_encoding), + LoadEnumSafe(&header.repetition_level_encoding), uncompressed_len, + std::move(data_page_statistics)); } else if (page_type == PageType::DATA_PAGE_V2) { ++page_ordinal_; const format::DataPageHeaderV2& header = current_page_header_.data_page_header_v2; @@ -565,7 +569,7 @@ std::shared_ptr SerializedPageReader::NextPage() { page_buffer, header.num_values, header.num_nulls, header.num_rows, LoadEnumSafe(&header.encoding), header.definition_levels_byte_length, header.repetition_levels_byte_length, uncompressed_len, is_compressed, - data_page_statistics); + std::move(data_page_statistics)); } else { throw ParquetException( "Internal error, we have already skipped non-data pages in ShouldSkipPage()"); @@ -907,6 +911,8 @@ class ColumnReaderImplBase { static_cast(data_size)); } + // Available values in the current data page, value includes repeated values + // and nulls. int64_t available_values_current_page() const { return num_buffered_values_ - num_decoded_values_; } @@ -933,7 +939,7 @@ class ColumnReaderImplBase { int64_t num_buffered_values_; // The number of values from the current data page that have been decoded - // into memory + // into memory or skipped over. int64_t num_decoded_values_; ::arrow::MemoryPool* pool_; @@ -1026,28 +1032,36 @@ class TypedColumnReaderImpl : public TypedColumnReader, // Read definition and repetition levels. Also return the number of definition levels // and number of values to read. This function is called before reading values. + // + // ReadLevels will throw exception when any num-levels read is not equal to the number + // of the levels can be read. void ReadLevels(int64_t batch_size, int16_t* def_levels, int16_t* rep_levels, - int64_t* num_def_levels, int64_t* values_to_read) { - batch_size = - std::min(batch_size, this->num_buffered_values_ - this->num_decoded_values_); + int64_t* num_def_levels, int64_t* non_null_values_to_read) { + batch_size = std::min(batch_size, this->available_values_current_page()); // If the field is required and non-repeated, there are no definition levels if (this->max_def_level_ > 0 && def_levels != nullptr) { *num_def_levels = this->ReadDefinitionLevels(batch_size, def_levels); + if (ARROW_PREDICT_FALSE(*num_def_levels != batch_size)) { + throw ParquetException(kErrorRepDefLevelNotMatchesNumValues); + } // TODO(wesm): this tallying of values-to-decode can be performed with better // cache-efficiency if fused with the level decoding. - *values_to_read += + *non_null_values_to_read += std::count(def_levels, def_levels + *num_def_levels, this->max_def_level_); } else { // Required field, read all values - *values_to_read = batch_size; + if (num_def_levels != nullptr) { + *num_def_levels = 0; + } + *non_null_values_to_read = batch_size; } // Not present for non-repeated fields if (this->max_rep_level_ > 0 && rep_levels != nullptr) { int64_t num_rep_levels = this->ReadRepetitionLevels(batch_size, rep_levels); - if (def_levels != nullptr && *num_def_levels != num_rep_levels) { - throw ParquetException("Number of decoded rep / def levels did not match"); + if (batch_size != num_rep_levels) { + throw ParquetException(kErrorRepDefLevelNotMatchesNumValues); } } } @@ -1090,8 +1104,7 @@ int64_t TypedColumnReaderImpl::ReadBatchWithDictionary( *indices_read = ReadDictionaryIndices(indices_to_read, indices); int64_t total_indices = std::max(num_def_levels, *indices_read); // Some callers use a batch size of 0 just to get the dictionary. - int64_t expected_values = - std::min(batch_size, this->num_buffered_values_ - this->num_decoded_values_); + int64_t expected_values = std::min(batch_size, this->available_values_current_page()); if (total_indices == 0 && expected_values > 0) { std::stringstream ss; ss << "Read 0 values, expected " << expected_values; @@ -1106,7 +1119,8 @@ template int64_t TypedColumnReaderImpl::ReadBatch(int64_t batch_size, int16_t* def_levels, int16_t* rep_levels, T* values, int64_t* values_read) { - // HasNext invokes ReadNewPage + // HasNext might invoke ReadNewPage until a data page with + // `available_values_current_page() > 0` is found. if (!HasNext()) { *values_read = 0; return 0; @@ -1115,20 +1129,31 @@ int64_t TypedColumnReaderImpl::ReadBatch(int64_t batch_size, int16_t* def // TODO(wesm): keep reading data pages until batch_size is reached, or the // row group is finished int64_t num_def_levels = 0; - int64_t values_to_read = 0; - ReadLevels(batch_size, def_levels, rep_levels, &num_def_levels, &values_to_read); - - *values_read = this->ReadValues(values_to_read, values); + // Number of non-null values to read within `num_def_levels`. + int64_t non_null_values_to_read = 0; + ReadLevels(batch_size, def_levels, rep_levels, &num_def_levels, + &non_null_values_to_read); + // Should not return more values than available in the current data page, + // since currently, ReadLevels would only consume level from current + // data page. + if (ARROW_PREDICT_FALSE(num_def_levels > this->available_values_current_page())) { + throw ParquetException(kErrorRepDefLevelNotMatchesNumValues); + } + if (non_null_values_to_read != 0) { + *values_read = this->ReadValues(non_null_values_to_read, values); + } else { + *values_read = 0; + } + // Adjust total_values, since if max_def_level_ == 0, num_def_levels would + // be 0 and `values_read` would adjust to `available_values_current_page()`. int64_t total_values = std::max(num_def_levels, *values_read); - int64_t expected_values = - std::min(batch_size, this->num_buffered_values_ - this->num_decoded_values_); + int64_t expected_values = std::min(batch_size, this->available_values_current_page()); if (total_values == 0 && expected_values > 0) { std::stringstream ss; ss << "Read 0 values, expected " << expected_values; ParquetException::EofException(ss.str()); } this->ConsumeBufferedValues(total_values); - return total_values; } @@ -1137,7 +1162,8 @@ int64_t TypedColumnReaderImpl::ReadBatchSpaced( int64_t batch_size, int16_t* def_levels, int16_t* rep_levels, T* values, uint8_t* valid_bits, int64_t valid_bits_offset, int64_t* levels_read, int64_t* values_read, int64_t* null_count_out) { - // HasNext invokes ReadNewPage + // HasNext might invoke ReadNewPage until a data page with + // `available_values_current_page() > 0` is found. if (!HasNext()) { *levels_read = 0; *values_read = 0; @@ -1145,21 +1171,24 @@ int64_t TypedColumnReaderImpl::ReadBatchSpaced( return 0; } + // Number of non-null values to read int64_t total_values; // TODO(wesm): keep reading data pages until batch_size is reached, or the // row group is finished - batch_size = - std::min(batch_size, this->num_buffered_values_ - this->num_decoded_values_); + batch_size = std::min(batch_size, this->available_values_current_page()); // If the field is required and non-repeated, there are no definition levels if (this->max_def_level_ > 0) { int64_t num_def_levels = this->ReadDefinitionLevels(batch_size, def_levels); + if (ARROW_PREDICT_FALSE(num_def_levels != batch_size)) { + throw ParquetException(kErrorRepDefLevelNotMatchesNumValues); + } // Not present for non-repeated fields if (this->max_rep_level_ > 0) { int64_t num_rep_levels = this->ReadRepetitionLevels(batch_size, rep_levels); - if (num_def_levels != num_rep_levels) { - throw ParquetException("Number of decoded rep / def levels did not match"); + if (ARROW_PREDICT_FALSE(num_def_levels != num_rep_levels)) { + throw ParquetException(kErrorRepDefLevelNotMatchesNumValues); } } @@ -1401,26 +1430,21 @@ class TypedRecordReader : public TypedColumnReaderImpl, int16_t* def_levels = this->def_levels() + levels_written_; int16_t* rep_levels = this->rep_levels() + levels_written_; - // Not present for non-repeated fields - int64_t levels_read = 0; + if (ARROW_PREDICT_FALSE(this->ReadDefinitionLevels(batch_size, def_levels) != + batch_size)) { + throw ParquetException(kErrorRepDefLevelNotMatchesNumValues); + } if (this->max_rep_level_ > 0) { - levels_read = this->ReadDefinitionLevels(batch_size, def_levels); - if (this->ReadRepetitionLevels(batch_size, rep_levels) != levels_read) { - throw ParquetException("Number of decoded rep / def levels did not match"); + int64_t rep_levels_read = this->ReadRepetitionLevels(batch_size, rep_levels); + if (ARROW_PREDICT_FALSE(rep_levels_read != batch_size)) { + throw ParquetException(kErrorRepDefLevelNotMatchesNumValues); } - } else if (this->max_def_level_ > 0) { - levels_read = this->ReadDefinitionLevels(batch_size, def_levels); - } - - // Exhausted column chunk - if (levels_read == 0) { - break; } - levels_written_ += levels_read; + levels_written_ += batch_size; records_read += ReadRecordData(num_records - records_read); } else { - // No repetition or definition levels + // No repetition and definition levels, we can read values directly batch_size = std::min(num_records - records_read, batch_size); records_read += ReadRecordData(batch_size); } @@ -1574,13 +1598,14 @@ class TypedRecordReader : public TypedColumnReaderImpl, int16_t* def_levels = this->def_levels() + levels_written_; int16_t* rep_levels = this->rep_levels() + levels_written_; - int64_t levels_read = 0; - levels_read = this->ReadDefinitionLevels(batch_size, def_levels); - if (this->ReadRepetitionLevels(batch_size, rep_levels) != levels_read) { - throw ParquetException("Number of decoded rep / def levels did not match"); + if (this->ReadDefinitionLevels(batch_size, def_levels) != batch_size) { + throw ParquetException(kErrorRepDefLevelNotMatchesNumValues); + } + if (this->ReadRepetitionLevels(batch_size, rep_levels) != batch_size) { + throw ParquetException(kErrorRepDefLevelNotMatchesNumValues); } - levels_written_ += levels_read; + levels_written_ += batch_size; int64_t remaining_records = num_records - skipped_records; // This updates at_record_start_. skipped_records += DelimitAndSkipRecordsInBuffer(remaining_records); @@ -1675,44 +1700,55 @@ class TypedRecordReader : public TypedColumnReaderImpl, // // \return Number of records delimited int64_t DelimitRecords(int64_t num_records, int64_t* values_seen) { - int64_t values_to_read = 0; + if (ARROW_PREDICT_FALSE(num_records == 0 || levels_position_ == levels_written_)) { + *values_seen = 0; + return 0; + } int64_t records_read = 0; - - const int16_t* def_levels = this->def_levels() + levels_position_; - const int16_t* rep_levels = this->rep_levels() + levels_position_; - + const int16_t* const rep_levels = this->rep_levels(); + const int16_t* const def_levels = this->def_levels(); ARROW_DCHECK_GT(this->max_rep_level_, 0); - - // Count logical records and number of values to read - while (levels_position_ < levels_written_) { - const int16_t rep_level = *rep_levels++; - if (rep_level == 0) { - // If at_record_start_ is true, we are seeing the start of a record - // for the second time, such as after repeated calls to - // DelimitRecords. In this case we must continue until we find - // another record start or exhausting the ColumnChunk - if (!at_record_start_) { - // We've reached the end of a record; increment the record count. - ++records_read; - if (records_read == num_records) { - // We've found the number of records we were looking for. Set - // at_record_start_ to true and break - at_record_start_ = true; - break; - } - } - } + // If at_record_start_ is true, we are seeing the start of a record + // for the second time, such as after repeated calls to + // DelimitRecords. In this case we must continue until we find + // another record start or exhausting the ColumnChunk + int64_t level = levels_position_; + if (at_record_start_) { + ARROW_DCHECK_EQ(0, rep_levels[levels_position_]); + ++levels_position_; // We have decided to consume the level at this position; therefore we // must advance until we find another record boundary at_record_start_ = false; + } - const int16_t def_level = *def_levels++; - if (def_level == this->max_def_level_) { - ++values_to_read; + // Count logical records and number of non-null values to read + ARROW_DCHECK(!at_record_start_); + // Scan repetition levels to find record end + while (levels_position_ < levels_written_) { + // We use an estimated batch size to simplify branching and + // improve performance in the common case. This might slow + // things down a bit if a single long record remains, though. + int64_t stride = + std::min(levels_written_ - levels_position_, num_records - records_read); + const int64_t position_end = levels_position_ + stride; + for (int64_t i = levels_position_; i < position_end; ++i) { + records_read += rep_levels[i] == 0; + } + levels_position_ = position_end; + if (records_read == num_records) { + // Check last rep_level reaches the boundary and + // pop the last level. + ARROW_CHECK_EQ(rep_levels[levels_position_ - 1], 0); + --levels_position_; + // We've found the number of records we were looking for. Set + // at_record_start_ to true and break + at_record_start_ = true; + break; } - ++levels_position_; } - *values_seen = values_to_read; + // Scan definition levels to find number of physical values + *values_seen = std::count(def_levels + level, def_levels + levels_position_, + this->max_def_level_); return records_read; } diff --git a/cpp/src/parquet/column_reader.h b/cpp/src/parquet/column_reader.h index 086f6c0e55806..29e1b2a25e437 100644 --- a/cpp/src/parquet/column_reader.h +++ b/cpp/src/parquet/column_reader.h @@ -197,7 +197,7 @@ class PARQUET_EXPORT ColumnReader { template class TypedColumnReader : public ColumnReader { public: - typedef typename DType::c_type T; + using T = typename DType::c_type; // Read a batch of repetition levels, definition levels, and values from the // column. diff --git a/cpp/src/parquet/column_reader_test.cc b/cpp/src/parquet/column_reader_test.cc index a48573966a905..9096f195687fb 100644 --- a/cpp/src/parquet/column_reader_test.cc +++ b/cpp/src/parquet/column_reader_test.cc @@ -415,7 +415,7 @@ TEST_F(TestPrimitiveReader, TestReadValuesMissing) { &descr, values, /*num_values=*/2, Encoding::PLAIN, /*indices=*/{}, /*indices_size=*/0, /*def_levels=*/input_def_levels, max_def_level_, /*rep_levels=*/{}, - /*max_rep_level=*/0); + /*max_rep_level=*/max_rep_level_); pages_.push_back(data_page); InitReader(&descr); auto reader = static_cast(reader_.get()); @@ -431,6 +431,80 @@ TEST_F(TestPrimitiveReader, TestReadValuesMissing) { ParquetException); } +// GH-41321: When max_def_level > 0 or max_rep_level > 0, and +// Page has more or less levels than the `num_values` in +// PageHeader. We should detect and throw exception. +TEST_F(TestPrimitiveReader, DefRepLevelNotExpected) { + auto do_check = [&](const NodePtr& type, const std::vector& input_def_levels, + const std::vector& input_rep_levels, int num_values) { + std::vector values(num_values, false); + const ColumnDescriptor descr(type, max_def_level_, max_rep_level_); + + // The data page falls back to plain encoding + std::shared_ptr dummy = AllocateBuffer(); + std::shared_ptr data_page = MakeDataPage( + &descr, values, /*num_values=*/num_values, Encoding::PLAIN, /*indices=*/{}, + /*indices_size=*/0, /*def_levels=*/input_def_levels, max_def_level_, + /*rep_levels=*/input_rep_levels, + /*max_rep_level=*/max_rep_level_); + pages_.push_back(data_page); + InitReader(&descr); + auto reader = static_cast(reader_.get()); + ASSERT_TRUE(reader->HasNext()); + + constexpr int batch_size = 10; + std::vector def_levels(batch_size, 0); + std::vector rep_levels(batch_size, 0); + bool values_out[batch_size]; + int64_t values_read; + EXPECT_THROW_THAT( + [&]() { + reader->ReadBatch(batch_size, def_levels.data(), rep_levels.data(), values_out, + &values_read); + }, + ParquetException, + ::testing::Property(&ParquetException::what, + ::testing::HasSubstr("Number of decoded rep / def levels do " + "not match num_values in page header"))); + }; + // storing def-levels less than value in page-header + { + max_def_level_ = 1; + max_rep_level_ = 0; + NodePtr type = schema::Boolean("a", Repetition::OPTIONAL); + std::vector input_def_levels(1, 1); + std::vector input_rep_levels{}; + do_check(type, input_def_levels, input_rep_levels, /*num_values=*/3); + } + // storing def-levels more than value in page-header + { + max_def_level_ = 1; + max_rep_level_ = 0; + NodePtr type = schema::Boolean("a", Repetition::OPTIONAL); + std::vector input_def_levels(2, 1); + std::vector input_rep_levels{}; + do_check(type, input_def_levels, input_rep_levels, /*num_values=*/1); + } + // storing rep-levels less than value in page-header + { + max_def_level_ = 0; + max_rep_level_ = 1; + NodePtr type = schema::Boolean("a", Repetition::REPEATED); + std::vector input_def_levels{}; + std::vector input_rep_levels(3, 0); + do_check(type, input_def_levels, input_rep_levels, /*num_values=*/4); + } + // storing rep-levels more than value in page-header + { + max_def_level_ = 0; + max_rep_level_ = 1; + NodePtr type = schema::Boolean("a", Repetition::REPEATED); + std::vector input_def_levels{}; + std::vector input_rep_levels(2, 1); + do_check(type, input_def_levels, input_rep_levels, /*num_values=*/1); + } +} + // Repetition level byte length reported in Page but Max Repetition level // is zero for the column. TEST_F(TestPrimitiveReader, TestRepetitionLvlBytesWithMaxRepetitionZero) { diff --git a/cpp/src/parquet/column_writer.cc b/cpp/src/parquet/column_writer.cc index 6ea542e44acbb..22d1c04f682f9 100644 --- a/cpp/src/parquet/column_writer.cc +++ b/cpp/src/parquet/column_writer.cc @@ -384,6 +384,11 @@ class SerializedPageWriter : public PageWriter { int64_t WriteDataPage(const DataPage& page) override { const int64_t uncompressed_size = page.uncompressed_size(); + if (uncompressed_size > std::numeric_limits::max()) { + throw ParquetException("Uncompressed data page size overflows INT32_MAX. Size:", + uncompressed_size); + } + std::shared_ptr compressed_data = page.buffer(); const uint8_t* output_data_buffer = compressed_data->data(); int64_t output_data_len = compressed_data->size(); @@ -404,11 +409,6 @@ class SerializedPageWriter : public PageWriter { } format::PageHeader page_header; - - if (uncompressed_size > std::numeric_limits::max()) { - throw ParquetException("Uncompressed data page size overflows INT32_MAX. Size:", - uncompressed_size); - } page_header.__set_uncompressed_page_size(static_cast(uncompressed_size)); page_header.__set_compressed_page_size(static_cast(output_data_len)); @@ -1027,13 +1027,13 @@ void ColumnWriterImpl::BuildDataPageV1(int64_t definition_levels_rle_size, compressed_data->CopySlice(0, compressed_data->size(), allocator_)); std::unique_ptr page_ptr = std::make_unique( compressed_data_copy, num_values, encoding_, Encoding::RLE, Encoding::RLE, - uncompressed_size, page_stats, first_row_index); + uncompressed_size, std::move(page_stats), first_row_index); total_compressed_bytes_ += page_ptr->size() + sizeof(format::PageHeader); data_pages_.push_back(std::move(page_ptr)); } else { // Eagerly write pages DataPageV1 page(compressed_data, num_values, encoding_, Encoding::RLE, Encoding::RLE, - uncompressed_size, page_stats, first_row_index); + uncompressed_size, std::move(page_stats), first_row_index); WriteDataPage(page); } } diff --git a/cpp/src/parquet/encoding.cc b/cpp/src/parquet/encoding.cc index 3da5c64ace5dd..903faa92b6370 100644 --- a/cpp/src/parquet/encoding.cc +++ b/cpp/src/parquet/encoding.cc @@ -551,7 +551,7 @@ class DictEncoderImpl : public EncoderImpl, virtual public DictEncoder { int result_size = WriteIndices(buffer->mutable_data(), static_cast(EstimatedDataEncodedSize())); PARQUET_THROW_NOT_OK(buffer->Resize(result_size, false)); - return std::move(buffer); + return buffer; } /// Writes out the encoded dictionary to buffer. buffer must be preallocated to @@ -2740,13 +2740,12 @@ class DeltaLengthByteArrayEncoder : public EncoderImpl, : EncoderImpl(descr, Encoding::DELTA_LENGTH_BYTE_ARRAY, pool = ::arrow::default_memory_pool()), sink_(pool), - length_encoder_(nullptr, pool), - encoded_size_{0} {} + length_encoder_(nullptr, pool) {} std::shared_ptr FlushValues() override; int64_t EstimatedDataEncodedSize() override { - return encoded_size_ + length_encoder_.EstimatedDataEncodedSize(); + return sink_.length() + length_encoder_.EstimatedDataEncodedSize(); } using TypedEncoder::Put; @@ -2768,6 +2767,11 @@ class DeltaLengthByteArrayEncoder : public EncoderImpl, return Status::Invalid( "Parquet cannot store strings with size 2GB or more, got: ", view.size()); } + if (ARROW_PREDICT_FALSE( + view.size() + sink_.length() > + static_cast(std::numeric_limits::max()))) { + return Status::Invalid("excess expansion in DELTA_LENGTH_BYTE_ARRAY"); + } length_encoder_.Put({static_cast(view.length())}, 1); PARQUET_THROW_NOT_OK(sink_.Append(view.data(), view.length())); return Status::OK(); @@ -2777,7 +2781,6 @@ class DeltaLengthByteArrayEncoder : public EncoderImpl, ::arrow::BufferBuilder sink_; DeltaBitPackEncoder length_encoder_; - uint32_t encoded_size_; }; template @@ -2803,15 +2806,15 @@ void DeltaLengthByteArrayEncoder::Put(const T* src, int num_values) { const int batch_size = std::min(kBatchSize, num_values - idx); for (int j = 0; j < batch_size; ++j) { const int32_t len = src[idx + j].len; - if (AddWithOverflow(total_increment_size, len, &total_increment_size)) { + if (ARROW_PREDICT_FALSE( + AddWithOverflow(total_increment_size, len, &total_increment_size))) { throw ParquetException("excess expansion in DELTA_LENGTH_BYTE_ARRAY"); } lengths[j] = len; } length_encoder_.Put(lengths.data(), batch_size); } - - if (AddWithOverflow(encoded_size_, total_increment_size, &encoded_size_)) { + if (sink_.length() + total_increment_size > std::numeric_limits::max()) { throw ParquetException("excess expansion in DELTA_LENGTH_BYTE_ARRAY"); } PARQUET_THROW_NOT_OK(sink_.Reserve(total_increment_size)); @@ -2850,7 +2853,6 @@ std::shared_ptr DeltaLengthByteArrayEncoder::FlushValues() { std::shared_ptr buffer; PARQUET_THROW_NOT_OK(sink_.Finish(&buffer, true)); - encoded_size_ = 0; return buffer; } @@ -3694,12 +3696,24 @@ class ByteStreamSplitDecoderBase : public DecoderImpl, ByteStreamSplitDecoderBase(const ColumnDescriptor* descr, int byte_width) : DecoderImpl(descr, Encoding::BYTE_STREAM_SPLIT), byte_width_(byte_width) {} - void SetData(int num_values, const uint8_t* data, int len) override { - if (static_cast(num_values) * byte_width_ != len) { - throw ParquetException("Data size (" + std::to_string(len) + - ") does not match number of values in BYTE_STREAM_SPLIT (" + - std::to_string(num_values) + ")"); + void SetData(int num_values, const uint8_t* data, int len) final { + // Check that the data size is consistent with the number of values + // The spec requires that the data size is a multiple of the number of values, + // see: https://github.com/apache/parquet-format/pull/192 . + // GH-41562: passed in `num_values` may include nulls, so we need to check and + // adjust the number of values. + if (static_cast(num_values) * byte_width_ < len) { + throw ParquetException( + "Data size (" + std::to_string(len) + + ") is too small for the number of values in in BYTE_STREAM_SPLIT (" + + std::to_string(num_values) + ")"); + } + if (len % byte_width_ != 0) { + throw ParquetException("ByteStreamSplit data size " + std::to_string(len) + + " not aligned with type " + TypeToString(DType::type_num) + + " and byte_width: " + std::to_string(byte_width_)); } + num_values = len / byte_width_; DecoderImpl::SetData(num_values, data, len); stride_ = num_values_; } diff --git a/cpp/src/parquet/encoding.h b/cpp/src/parquet/encoding.h index 602009189595e..493c4044ddc1c 100644 --- a/cpp/src/parquet/encoding.h +++ b/cpp/src/parquet/encoding.h @@ -255,6 +255,11 @@ class Decoder { // Sets the data for a new page. This will be called multiple times on the same // decoder and should reset all internal state. + // + // `num_values` comes from the data page header, and may be greater than the number of + // physical values in the data buffer if there are some omitted (null) values. + // `len`, on the other hand, is the size in bytes of the data buffer and + // directly relates to the number of physical values. virtual void SetData(int num_values, const uint8_t* data, int len) = 0; // Returns the number of values left (for the last call to SetData()). This is diff --git a/cpp/src/parquet/encoding_test.cc b/cpp/src/parquet/encoding_test.cc index b91fcb0839cba..78bf26587e3fb 100644 --- a/cpp/src/parquet/encoding_test.cc +++ b/cpp/src/parquet/encoding_test.cc @@ -577,6 +577,11 @@ TEST(PlainEncodingAdHoc, ArrowBinaryDirectPut) { auto decoder = MakeTypedDecoder(Encoding::PLAIN); ASSERT_NO_THROW(encoder->Put(*values)); + // For Plain encoding, the estimated size should be at least the total byte size + auto& string_array = dynamic_cast(*values); + EXPECT_GE(encoder->EstimatedDataEncodedSize(), string_array.total_values_length()) + << "Estimated size should be at least the total byte size"; + auto buf = encoder->FlushValues(); int num_values = static_cast(values->length() - values->null_count()); @@ -1383,7 +1388,7 @@ class TestByteStreamSplitEncoding : public TestEncodingBase { encoder->PutSpaced(draws_, num_values_, valid_bits, valid_bits_offset); encode_buffer_ = encoder->FlushValues(); ASSERT_EQ(encode_buffer_->size(), physical_byte_width() * (num_values_ - null_count)); - decoder->SetData(num_values_ - null_count, encode_buffer_->data(), + decoder->SetData(num_values_, encode_buffer_->data(), static_cast(encode_buffer_->size())); auto values_decoded = decoder->DecodeSpaced(decode_buf_, num_values_, null_count, valid_bits, valid_bits_offset); @@ -1717,7 +1722,7 @@ class TestDeltaBitPackEncoding : public TestEncodingBase { for (size_t i = 0; i < kNumRoundTrips; ++i) { encoder->PutSpaced(draws_, num_values_, valid_bits, valid_bits_offset); encode_buffer_ = encoder->FlushValues(); - decoder->SetData(num_values_ - null_count, encode_buffer_->data(), + decoder->SetData(num_values_, encode_buffer_->data(), static_cast(encode_buffer_->size())); auto values_decoded = decoder->DecodeSpaced(decode_buf_, num_values_, null_count, valid_bits, valid_bits_offset); @@ -2160,6 +2165,10 @@ TEST(DeltaLengthByteArrayEncodingAdHoc, ArrowBinaryDirectPut) { auto CheckSeed = [&](std::shared_ptr<::arrow::Array> values) { ASSERT_NO_THROW(encoder->Put(*values)); + auto* binary_array = checked_cast(values.get()); + // For DeltaLength encoding, the estimated size should be at least the total byte size + EXPECT_GE(encoder->EstimatedDataEncodedSize(), binary_array->total_values_length()) + << "Estimated size should be at least the total byte size"; auto buf = encoder->FlushValues(); int num_values = static_cast(values->length() - values->null_count()); diff --git a/cpp/src/parquet/encryption/file_key_unwrapper.cc b/cpp/src/parquet/encryption/file_key_unwrapper.cc index 02bea127fd1c3..d88aa6c52ac12 100644 --- a/cpp/src/parquet/encryption/file_key_unwrapper.cc +++ b/cpp/src/parquet/encryption/file_key_unwrapper.cc @@ -124,7 +124,7 @@ KeyWithMasterId FileKeyUnwrapper::GetDataEncryptionKey(const KeyMaterial& key_ma data_key = internal::DecryptKeyLocally(encoded_wrapped_dek, kek_bytes, aad); } - return KeyWithMasterId(std::move(data_key), std::move(master_key_id)); + return KeyWithMasterId(std::move(data_key), master_key_id); } std::shared_ptr FileKeyUnwrapper::GetKmsClientFromConfigOrKeyMaterial( diff --git a/cpp/src/parquet/file_reader.cc b/cpp/src/parquet/file_reader.cc index b3dd1d6054ac8..8fcb0870ce4b6 100644 --- a/cpp/src/parquet/file_reader.cc +++ b/cpp/src/parquet/file_reader.cc @@ -215,16 +215,14 @@ class SerializedRowGroup : public RowGroupReader::Contents { std::shared_ptr<::arrow::io::internal::ReadRangeCache> cached_source, int64_t source_size, FileMetaData* file_metadata, int row_group_number, ReaderProperties props, - std::shared_ptr prebuffered_column_chunks_bitmap, - std::shared_ptr file_decryptor = nullptr) + std::shared_ptr prebuffered_column_chunks_bitmap) : source_(std::move(source)), cached_source_(std::move(cached_source)), source_size_(source_size), file_metadata_(file_metadata), properties_(std::move(props)), row_group_ordinal_(row_group_number), - prebuffered_column_chunks_bitmap_(std::move(prebuffered_column_chunks_bitmap)), - file_decryptor_(std::move(file_decryptor)) { + prebuffered_column_chunks_bitmap_(std::move(prebuffered_column_chunks_bitmap)) { row_group_metadata_ = file_metadata->RowGroup(row_group_number); } @@ -263,10 +261,10 @@ class SerializedRowGroup : public RowGroupReader::Contents { } // The column is encrypted - std::shared_ptr meta_decryptor = - GetColumnMetaDecryptor(crypto_metadata.get(), file_decryptor_.get()); - std::shared_ptr data_decryptor = - GetColumnDataDecryptor(crypto_metadata.get(), file_decryptor_.get()); + std::shared_ptr meta_decryptor = GetColumnMetaDecryptor( + crypto_metadata.get(), file_metadata_->file_decryptor().get()); + std::shared_ptr data_decryptor = GetColumnDataDecryptor( + crypto_metadata.get(), file_metadata_->file_decryptor().get()); ARROW_DCHECK_NE(meta_decryptor, nullptr); ARROW_DCHECK_NE(data_decryptor, nullptr); @@ -291,7 +289,6 @@ class SerializedRowGroup : public RowGroupReader::Contents { ReaderProperties properties_; int row_group_ordinal_; const std::shared_ptr prebuffered_column_chunks_bitmap_; - std::shared_ptr file_decryptor_; }; // ---------------------------------------------------------------------- @@ -316,7 +313,9 @@ class SerializedFile : public ParquetFileReader::Contents { } void Close() override { - if (file_decryptor_) file_decryptor_->WipeOutDecryptionKeys(); + if (file_metadata_ && file_metadata_->file_decryptor()) { + file_metadata_->file_decryptor()->WipeOutDecryptionKeys(); + } } std::shared_ptr GetRowGroup(int i) override { @@ -330,7 +329,7 @@ class SerializedFile : public ParquetFileReader::Contents { std::unique_ptr contents = std::make_unique( source_, cached_source_, source_size_, file_metadata_.get(), i, properties_, - std::move(prebuffered_column_chunks_bitmap), file_decryptor_); + std::move(prebuffered_column_chunks_bitmap)); return std::make_shared(std::move(contents)); } @@ -346,8 +345,9 @@ class SerializedFile : public ParquetFileReader::Contents { "forget to call ParquetFileReader::Open() first?"); } if (!page_index_reader_) { - page_index_reader_ = PageIndexReader::Make(source_.get(), file_metadata_, - properties_, file_decryptor_.get()); + page_index_reader_ = + PageIndexReader::Make(source_.get(), file_metadata_, properties_, + file_metadata_->file_decryptor().get()); } return page_index_reader_; } @@ -362,8 +362,8 @@ class SerializedFile : public ParquetFileReader::Contents { "forget to call ParquetFileReader::Open() first?"); } if (!bloom_filter_reader_) { - bloom_filter_reader_ = - BloomFilterReader::Make(source_, file_metadata_, properties_, file_decryptor_); + bloom_filter_reader_ = BloomFilterReader::Make(source_, file_metadata_, properties_, + file_metadata_->file_decryptor()); if (bloom_filter_reader_ == nullptr) { throw ParquetException("Cannot create BloomFilterReader"); } @@ -441,10 +441,12 @@ class SerializedFile : public ParquetFileReader::Contents { // Parse the footer depending on encryption type const bool is_encrypted_footer = memcmp(footer_buffer->data() + footer_read_size - 4, kParquetEMagic, 4) == 0; + std::shared_ptr file_decryptor; if (is_encrypted_footer) { // Encrypted file with Encrypted footer. const std::pair read_size = - ParseMetaDataOfEncryptedFileWithEncryptedFooter(metadata_buffer, metadata_len); + ParseMetaDataOfEncryptedFileWithEncryptedFooter(metadata_buffer, metadata_len, + &file_decryptor); // Read the actual footer metadata_start = read_size.first; metadata_len = read_size.second; @@ -453,8 +455,8 @@ class SerializedFile : public ParquetFileReader::Contents { // Fall through } - const uint32_t read_metadata_len = - ParseUnencryptedFileMetadata(metadata_buffer, metadata_len); + const uint32_t read_metadata_len = ParseUnencryptedFileMetadata( + metadata_buffer, metadata_len, std::move(file_decryptor)); auto file_decryption_properties = properties_.file_decryption_properties().get(); if (is_encrypted_footer) { // Nothing else to do here. @@ -550,34 +552,37 @@ class SerializedFile : public ParquetFileReader::Contents { // Parse the footer depending on encryption type const bool is_encrypted_footer = memcmp(footer_buffer->data() + footer_read_size - 4, kParquetEMagic, 4) == 0; + std::shared_ptr file_decryptor; if (is_encrypted_footer) { // Encrypted file with Encrypted footer. std::pair read_size; BEGIN_PARQUET_CATCH_EXCEPTIONS - read_size = - ParseMetaDataOfEncryptedFileWithEncryptedFooter(metadata_buffer, metadata_len); + read_size = ParseMetaDataOfEncryptedFileWithEncryptedFooter( + metadata_buffer, metadata_len, &file_decryptor); END_PARQUET_CATCH_EXCEPTIONS // Read the actual footer int64_t metadata_start = read_size.first; metadata_len = read_size.second; return source_->ReadAsync(metadata_start, metadata_len) - .Then([this, metadata_len, is_encrypted_footer]( + .Then([this, metadata_len, is_encrypted_footer, file_decryptor]( const std::shared_ptr<::arrow::Buffer>& metadata_buffer) { // Continue and read the file footer - return ParseMetaDataFinal(metadata_buffer, metadata_len, is_encrypted_footer); + return ParseMetaDataFinal(metadata_buffer, metadata_len, is_encrypted_footer, + file_decryptor); }); } return ParseMetaDataFinal(std::move(metadata_buffer), metadata_len, - is_encrypted_footer); + is_encrypted_footer, std::move(file_decryptor)); } // Continuation - ::arrow::Status ParseMetaDataFinal(std::shared_ptr<::arrow::Buffer> metadata_buffer, - uint32_t metadata_len, - const bool is_encrypted_footer) { + ::arrow::Status ParseMetaDataFinal( + std::shared_ptr<::arrow::Buffer> metadata_buffer, uint32_t metadata_len, + const bool is_encrypted_footer, + std::shared_ptr file_decryptor) { BEGIN_PARQUET_CATCH_EXCEPTIONS - const uint32_t read_metadata_len = - ParseUnencryptedFileMetadata(metadata_buffer, metadata_len); + const uint32_t read_metadata_len = ParseUnencryptedFileMetadata( + metadata_buffer, metadata_len, std::move(file_decryptor)); auto file_decryption_properties = properties_.file_decryption_properties().get(); if (is_encrypted_footer) { // Nothing else to do here. @@ -608,11 +613,11 @@ class SerializedFile : public ParquetFileReader::Contents { // Maps row group ordinal and prebuffer status of its column chunks in the form of a // bitmap buffer. std::unordered_map> prebuffered_column_chunks_; - std::shared_ptr file_decryptor_; // \return The true length of the metadata in bytes - uint32_t ParseUnencryptedFileMetadata(const std::shared_ptr& footer_buffer, - const uint32_t metadata_len); + uint32_t ParseUnencryptedFileMetadata( + const std::shared_ptr& footer_buffer, const uint32_t metadata_len, + std::shared_ptr file_decryptor); std::string HandleAadPrefix(FileDecryptionProperties* file_decryption_properties, EncryptionAlgorithm& algo); @@ -624,11 +629,13 @@ class SerializedFile : public ParquetFileReader::Contents { // \return The position and size of the actual footer std::pair ParseMetaDataOfEncryptedFileWithEncryptedFooter( - const std::shared_ptr& crypto_metadata_buffer, uint32_t footer_len); + const std::shared_ptr& crypto_metadata_buffer, uint32_t footer_len, + std::shared_ptr* file_decryptor); }; uint32_t SerializedFile::ParseUnencryptedFileMetadata( - const std::shared_ptr& metadata_buffer, const uint32_t metadata_len) { + const std::shared_ptr& metadata_buffer, const uint32_t metadata_len, + std::shared_ptr file_decryptor) { if (metadata_buffer->size() != metadata_len) { throw ParquetException("Failed reading metadata buffer (requested " + std::to_string(metadata_len) + " bytes but got " + @@ -637,7 +644,7 @@ uint32_t SerializedFile::ParseUnencryptedFileMetadata( uint32_t read_metadata_len = metadata_len; // The encrypted read path falls through to here, so pass in the decryptor file_metadata_ = FileMetaData::Make(metadata_buffer->data(), &read_metadata_len, - properties_, file_decryptor_); + properties_, std::move(file_decryptor)); return read_metadata_len; } @@ -645,7 +652,7 @@ std::pair SerializedFile::ParseMetaDataOfEncryptedFileWithEncryptedFooter( const std::shared_ptr<::arrow::Buffer>& crypto_metadata_buffer, // both metadata & crypto metadata length - const uint32_t footer_len) { + const uint32_t footer_len, std::shared_ptr* file_decryptor) { // encryption with encrypted footer // Check if the footer_buffer contains the entire metadata if (crypto_metadata_buffer->size() != footer_len) { @@ -664,7 +671,7 @@ SerializedFile::ParseMetaDataOfEncryptedFileWithEncryptedFooter( // Handle AAD prefix EncryptionAlgorithm algo = file_crypto_metadata->encryption_algorithm(); std::string file_aad = HandleAadPrefix(file_decryption_properties, algo); - file_decryptor_ = std::make_shared( + *file_decryptor = std::make_shared( file_decryption_properties, file_aad, algo.algorithm, file_crypto_metadata->key_metadata(), properties_.memory_pool()); @@ -683,12 +690,12 @@ void SerializedFile::ParseMetaDataOfEncryptedFileWithPlaintextFooter( EncryptionAlgorithm algo = file_metadata_->encryption_algorithm(); // Handle AAD prefix std::string file_aad = HandleAadPrefix(file_decryption_properties, algo); - file_decryptor_ = std::make_shared( + auto file_decryptor = std::make_shared( file_decryption_properties, file_aad, algo.algorithm, file_metadata_->footer_signing_key_metadata(), properties_.memory_pool()); // set the InternalFileDecryptor in the metadata as well, as it's used // for signature verification and for ColumnChunkMetaData creation. - file_metadata_->set_file_decryptor(file_decryptor_); + file_metadata_->set_file_decryptor(std::move(file_decryptor)); if (file_decryption_properties->check_plaintext_footer_integrity()) { if (metadata_len - read_metadata_len != diff --git a/cpp/src/parquet/file_writer.h b/cpp/src/parquet/file_writer.h index 31706af86dbde..d5ea1d7c98a0e 100644 --- a/cpp/src/parquet/file_writer.h +++ b/cpp/src/parquet/file_writer.h @@ -202,7 +202,7 @@ class PARQUET_EXPORT ParquetFileWriter { /// \brief Add key-value metadata to the file. /// \param[in] key_value_metadata the metadata to add. - /// \note This will overwrite any existing metadata with the same key. + /// \note This will overwrite any existing metadata with the same key(s). /// \throw ParquetException if Close() has been called. void AddKeyValueMetadata( const std::shared_ptr& key_value_metadata); diff --git a/cpp/src/parquet/metadata.cc b/cpp/src/parquet/metadata.cc index e6cc9378b9e8f..a735b2bd116f9 100644 --- a/cpp/src/parquet/metadata.cc +++ b/cpp/src/parquet/metadata.cc @@ -826,6 +826,10 @@ class FileMetaData::FileMetaDataImpl { file_decryptor_ = std::move(file_decryptor); } + const std::shared_ptr& file_decryptor() const { + return file_decryptor_; + } + private: friend FileMetaDataBuilder; uint32_t metadata_len_ = 0; @@ -947,6 +951,10 @@ void FileMetaData::set_file_decryptor( impl_->set_file_decryptor(std::move(file_decryptor)); } +const std::shared_ptr& FileMetaData::file_decryptor() const { + return impl_->file_decryptor(); +} + ParquetVersion::type FileMetaData::version() const { switch (impl_->version()) { case 1: diff --git a/cpp/src/parquet/metadata.h b/cpp/src/parquet/metadata.h index 1a79affe42be0..391346aca4466 100644 --- a/cpp/src/parquet/metadata.h +++ b/cpp/src/parquet/metadata.h @@ -398,12 +398,14 @@ class PARQUET_EXPORT FileMetaData { private: friend FileMetaDataBuilder; friend class SerializedFile; + friend class SerializedRowGroup; explicit FileMetaData(const void* serialized_metadata, uint32_t* metadata_len, const ReaderProperties& properties, std::shared_ptr file_decryptor = NULLPTR); void set_file_decryptor(std::shared_ptr file_decryptor); + const std::shared_ptr& file_decryptor() const; // PIMPL Idiom FileMetaData(); diff --git a/cpp/src/parquet/metadata_benchmark.cc b/cpp/src/parquet/metadata_benchmark.cc new file mode 100644 index 0000000000000..97a99be798cbb --- /dev/null +++ b/cpp/src/parquet/metadata_benchmark.cc @@ -0,0 +1,156 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include +#include + +#include + +#include "arrow/buffer.h" +#include "arrow/io/memory.h" +#include "arrow/util/logging.h" + +#include "parquet/column_writer.h" +#include "parquet/file_reader.h" +#include "parquet/file_writer.h" +#include "parquet/metadata.h" +#include "parquet/platform.h" +#include "parquet/schema.h" + +namespace parquet { + +using ::arrow::Buffer; +using ::arrow::io::BufferOutputStream; +using ::arrow::io::BufferReader; +using schema::GroupNode; +using schema::NodePtr; +using schema::NodeVector; + +class MetadataBenchmark { + public: + explicit MetadataBenchmark(benchmark::State* state) + : MetadataBenchmark(static_cast(state->range(0)), + static_cast(state->range(1))) {} + + MetadataBenchmark(int num_columns, int num_row_groups) + : num_columns_(num_columns), num_row_groups_(num_row_groups) { + NodeVector fields; + for (int i = 0; i < num_columns_; ++i) { + std::stringstream ss; + ss << "col" << i; + fields.push_back(parquet::schema::Int32(ss.str(), Repetition::REQUIRED)); + } + schema_root_ = std::static_pointer_cast( + GroupNode::Make("schema", Repetition::REQUIRED, fields)); + + WriterProperties::Builder prop_builder; + writer_properties_ = prop_builder.version(ParquetVersion::PARQUET_2_6) + ->disable_dictionary() + ->data_page_version(ParquetDataPageVersion::V2) + ->build(); + } + + std::shared_ptr WriteFile(benchmark::State* state) { + PARQUET_ASSIGN_OR_THROW(auto sink, BufferOutputStream::Create()); + + auto writer = ParquetFileWriter::Open(sink, schema_root_, writer_properties_); + std::vector int32_values(1, 42); + int64_t data_size = 0; + for (int rg = 0; rg < num_row_groups_; ++rg) { + auto row_group_writer = writer->AppendRowGroup(); + for (int col = 0; col < num_columns_; ++col) { + auto col_writer = row_group_writer->NextColumn(); + ARROW_CHECK_EQ(col_writer->type(), Type::INT32); + auto typed_col_writer = static_cast(col_writer); + typed_col_writer->WriteBatch( + /*num_values=*/static_cast(int32_values.size()), + /*def_levels=*/nullptr, /*rep_levels=*/nullptr, int32_values.data()); + typed_col_writer->Close(); + } + row_group_writer->Close(); + data_size += row_group_writer->total_compressed_bytes_written(); + } + writer->Close(); + PARQUET_ASSIGN_OR_THROW(auto buf, sink->Finish()); + state->counters["file_size"] = static_cast(buf->size()); + // Note that "data_size" includes the Thrift page headers + state->counters["data_size"] = static_cast(data_size); + return buf; + } + + void ReadFile(std::shared_ptr contents) { + auto source = std::make_shared(contents); + ReaderProperties props; + auto reader = ParquetFileReader::Open(source, props); + auto metadata = reader->metadata(); + ARROW_CHECK_EQ(metadata->num_columns(), num_columns_); + ARROW_CHECK_EQ(metadata->num_row_groups(), num_row_groups_); + // There should be one row per row group + ARROW_CHECK_EQ(metadata->num_rows(), num_row_groups_); + reader->Close(); + } + + private: + int num_columns_; + int num_row_groups_; + std::shared_ptr schema_root_; + std::shared_ptr writer_properties_; +}; + +void WriteMetadataSetArgs(benchmark::internal::Benchmark* bench) { + bench->ArgNames({"num_columns", "num_row_groups"}); + + for (int num_columns : {1, 10, 100}) { + for (int num_row_groups : {1, 100, 1000}) { + bench->Args({num_columns, num_row_groups}); + } + } + /* For larger num_columns, restrict num_row_groups to small values + * to avoid blowing up benchmark execution time. + */ + for (int num_row_groups : {1, 100}) { + bench->Args({/*num_columns=*/1000, num_row_groups}); + } +} + +void ReadMetadataSetArgs(benchmark::internal::Benchmark* bench) { + WriteMetadataSetArgs(bench); +} + +void WriteFileMetadataAndData(benchmark::State& state) { + MetadataBenchmark benchmark(&state); + + for (auto _ : state) { + auto sink = benchmark.WriteFile(&state); + } + state.SetItemsProcessed(state.iterations()); +} + +void ReadFileMetadata(benchmark::State& state) { + MetadataBenchmark benchmark(&state); + auto contents = benchmark.WriteFile(&state); + + for (auto _ : state) { + benchmark.ReadFile(contents); + } + state.SetItemsProcessed(state.iterations()); +} + +BENCHMARK(WriteFileMetadataAndData)->Apply(WriteMetadataSetArgs); +BENCHMARK(ReadFileMetadata)->Apply(ReadMetadataSetArgs); + +} // namespace parquet diff --git a/cpp/src/parquet/platform.cc b/cpp/src/parquet/platform.cc index 5c355c28be1c3..98946029fb866 100644 --- a/cpp/src/parquet/platform.cc +++ b/cpp/src/parquet/platform.cc @@ -35,7 +35,7 @@ std::shared_ptr<::arrow::io::BufferOutputStream> CreateOutputStream(MemoryPool* std::shared_ptr AllocateBuffer(MemoryPool* pool, int64_t size) { PARQUET_ASSIGN_OR_THROW(auto result, ::arrow::AllocateResizableBuffer(size, pool)); - return std::move(result); + return result; } } // namespace parquet diff --git a/cpp/src/parquet/properties.cc b/cpp/src/parquet/properties.cc index 2267efdf8a44a..4e6c558e064d4 100644 --- a/cpp/src/parquet/properties.cc +++ b/cpp/src/parquet/properties.cc @@ -38,7 +38,7 @@ std::shared_ptr ReaderProperties::GetStream( PARQUET_ASSIGN_OR_THROW( auto stream, ::arrow::io::BufferedInputStream::Create(buffer_size_, pool_, safe_stream, num_bytes)); - return std::move(stream); + return stream; } else { PARQUET_ASSIGN_OR_THROW(auto data, source->ReadAt(start, num_bytes)); diff --git a/cpp/src/parquet/thrift_internal.h b/cpp/src/parquet/thrift_internal.h index 7491f118d32a0..9e02331b44ba0 100644 --- a/cpp/src/parquet/thrift_internal.h +++ b/cpp/src/parquet/thrift_internal.h @@ -446,13 +446,12 @@ class ThriftDeserializer { T* deserialized_msg) { // Deserialize msg bytes into c++ thrift msg using memory transport. auto tmem_transport = CreateReadOnlyMemoryBuffer(const_cast(buf), *len); - apache::thrift::protocol::TCompactProtocolFactoryT tproto_factory; - // Protect against CPU and memory bombs - tproto_factory.setStringSizeLimit(string_size_limit_); - tproto_factory.setContainerSizeLimit(container_size_limit_); - auto tproto = tproto_factory.getProtocol(tmem_transport); + auto tproto = apache::thrift::protocol::TCompactProtocolT( + tmem_transport, string_size_limit_, container_size_limit_); try { - deserialized_msg->read(tproto.get()); + deserialized_msg + ->template read>( + &tproto); } catch (std::exception& e) { std::stringstream ss; ss << "Couldn't deserialize thrift: " << e.what() << "\n"; diff --git a/cpp/src/skyhook/cls/cls_skyhook.cc b/cpp/src/skyhook/cls/cls_skyhook.cc index e021cb3c8248a..632b82f1d1a6c 100644 --- a/cpp/src/skyhook/cls/cls_skyhook.cc +++ b/cpp/src/skyhook/cls/cls_skyhook.cc @@ -95,7 +95,7 @@ class RandomAccessObject : public arrow::io::RandomAccessFile { arrow::Result> Read(int64_t nbytes) override { ARROW_ASSIGN_OR_RAISE(auto buffer, ReadAt(pos_, nbytes)); pos_ += buffer->size(); - return std::move(buffer); + return buffer; } /// Read a specified number of bytes from the current position into an output stream. diff --git a/cpp/thirdparty/versions.txt b/cpp/thirdparty/versions.txt index 4983f3cee2c2d..06506d32bef7c 100644 --- a/cpp/thirdparty/versions.txt +++ b/cpp/thirdparty/versions.txt @@ -113,8 +113,8 @@ ARROW_UCX_BUILD_VERSION=1.12.1 ARROW_UCX_BUILD_SHA256_CHECKSUM=9bef31aed0e28bf1973d28d74d9ac4f8926c43ca3b7010bd22a084e164e31b71 ARROW_UTF8PROC_BUILD_VERSION=v2.7.0 ARROW_UTF8PROC_BUILD_SHA256_CHECKSUM=4bb121e297293c0fd55f08f83afab6d35d48f0af4ecc07523ad8ec99aa2b12a1 -ARROW_XSIMD_BUILD_VERSION=9.0.1 -ARROW_XSIMD_BUILD_SHA256_CHECKSUM=b1bb5f92167fd3a4f25749db0be7e61ed37e0a5d943490f3accdcd2cd2918cc0 +ARROW_XSIMD_BUILD_VERSION=13.0.0 +ARROW_XSIMD_BUILD_SHA256_CHECKSUM=8bdbbad0c3e7afa38d88d0d484d70a1671a1d8aefff03f4223ab2eb6a41110a3 ARROW_ZLIB_BUILD_VERSION=1.3.1 ARROW_ZLIB_BUILD_SHA256_CHECKSUM=9a93b2b7dfdac77ceba5a558a580e74667dd6fede4585b91eefb60f03b72df23 ARROW_ZSTD_BUILD_VERSION=1.5.6 diff --git a/csharp/Directory.Build.props b/csharp/Directory.Build.props index f6d42241f95cf..3c06d3cd31d90 100644 --- a/csharp/Directory.Build.props +++ b/csharp/Directory.Build.props @@ -37,12 +37,13 @@ latest true $(CSharpDir)ApacheArrow.snk + true The Apache Software Foundation - https://www.apache.org/images/feather.png + feather.png LICENSE.txt https://arrow.apache.org/ @@ -55,6 +56,7 @@ + diff --git a/csharp/examples/FlightAspServerExample/FlightAspServerExample.csproj b/csharp/examples/FlightAspServerExample/FlightAspServerExample.csproj index 98e8bb324c727..79312520ba8c0 100644 --- a/csharp/examples/FlightAspServerExample/FlightAspServerExample.csproj +++ b/csharp/examples/FlightAspServerExample/FlightAspServerExample.csproj @@ -20,7 +20,7 @@ - net7.0 + net8.0 enable enable 10 diff --git a/csharp/examples/FlightClientExample/FlightClientExample.csproj b/csharp/examples/FlightClientExample/FlightClientExample.csproj index 228c5cf4ea44c..ce0ec83359769 100644 --- a/csharp/examples/FlightClientExample/FlightClientExample.csproj +++ b/csharp/examples/FlightClientExample/FlightClientExample.csproj @@ -21,7 +21,7 @@ Exe - net7.0 + net8.0 diff --git a/csharp/examples/FluentBuilderExample/FluentBuilderExample.csproj b/csharp/examples/FluentBuilderExample/FluentBuilderExample.csproj index a7bd5392f007d..b1d76d4db00e0 100644 --- a/csharp/examples/FluentBuilderExample/FluentBuilderExample.csproj +++ b/csharp/examples/FluentBuilderExample/FluentBuilderExample.csproj @@ -2,7 +2,7 @@ Exe - net7.0 + net8.0 diff --git a/csharp/feather.png b/csharp/feather.png new file mode 100644 index 0000000000000..7b596e6683ddb Binary files /dev/null and b/csharp/feather.png differ diff --git a/csharp/src/Apache.Arrow.Compression/Apache.Arrow.Compression.csproj b/csharp/src/Apache.Arrow.Compression/Apache.Arrow.Compression.csproj index 0ce8c89bb1d1b..b8f69672cbc7c 100644 --- a/csharp/src/Apache.Arrow.Compression/Apache.Arrow.Compression.csproj +++ b/csharp/src/Apache.Arrow.Compression/Apache.Arrow.Compression.csproj @@ -13,7 +13,7 @@ - + diff --git a/csharp/src/Apache.Arrow.Flight.AspNetCore/Apache.Arrow.Flight.AspNetCore.csproj b/csharp/src/Apache.Arrow.Flight.AspNetCore/Apache.Arrow.Flight.AspNetCore.csproj index 55497203a12be..2dd1d9d8f98e2 100644 --- a/csharp/src/Apache.Arrow.Flight.AspNetCore/Apache.Arrow.Flight.AspNetCore.csproj +++ b/csharp/src/Apache.Arrow.Flight.AspNetCore/Apache.Arrow.Flight.AspNetCore.csproj @@ -5,7 +5,7 @@ - + diff --git a/csharp/src/Apache.Arrow.Flight.Sql/Apache.Arrow.Flight.Sql.csproj b/csharp/src/Apache.Arrow.Flight.Sql/Apache.Arrow.Flight.Sql.csproj index 9a3cf190cc376..ee6d42c8d17fc 100644 --- a/csharp/src/Apache.Arrow.Flight.Sql/Apache.Arrow.Flight.Sql.csproj +++ b/csharp/src/Apache.Arrow.Flight.Sql/Apache.Arrow.Flight.Sql.csproj @@ -5,7 +5,7 @@ - + diff --git a/csharp/src/Apache.Arrow.Flight/Apache.Arrow.Flight.csproj b/csharp/src/Apache.Arrow.Flight/Apache.Arrow.Flight.csproj index 04b8a7dc734f0..21b0df349e2d8 100644 --- a/csharp/src/Apache.Arrow.Flight/Apache.Arrow.Flight.csproj +++ b/csharp/src/Apache.Arrow.Flight/Apache.Arrow.Flight.csproj @@ -5,9 +5,9 @@ - - - + + + diff --git a/csharp/src/Apache.Arrow/Arrays/BinaryArray.cs b/csharp/src/Apache.Arrow/Arrays/BinaryArray.cs index 1bd4035d5b9da..bd5d9315e9fc4 100644 --- a/csharp/src/Apache.Arrow/Arrays/BinaryArray.cs +++ b/csharp/src/Apache.Arrow/Arrays/BinaryArray.cs @@ -22,7 +22,7 @@ namespace Apache.Arrow { - public class BinaryArray : Array, IReadOnlyList + public class BinaryArray : Array, IReadOnlyList, ICollection { public class Builder : BuilderBase { @@ -380,5 +380,30 @@ IEnumerator IEnumerable.GetEnumerator() } IEnumerator IEnumerable.GetEnumerator() => ((IEnumerable)this).GetEnumerator(); + + int ICollection.Count => Length; + bool ICollection.IsReadOnly => true; + void ICollection.Add(byte[] item) => throw new NotSupportedException("Collection is read-only."); + bool ICollection.Remove(byte[] item) => throw new NotSupportedException("Collection is read-only."); + void ICollection.Clear() => throw new NotSupportedException("Collection is read-only."); + + bool ICollection.Contains(byte[] item) + { + for (int index = 0; index < Length; index++) + { + if (GetBytes(index).SequenceEqual(item)) + return true; + } + + return false; + } + + void ICollection.CopyTo(byte[][] array, int arrayIndex) + { + for (int srcIndex = 0, destIndex = arrayIndex; srcIndex < Length; srcIndex++, destIndex++) + { + array[destIndex] = GetBytes(srcIndex).ToArray(); + } + } } } diff --git a/csharp/src/Apache.Arrow/Arrays/BooleanArray.cs b/csharp/src/Apache.Arrow/Arrays/BooleanArray.cs index e9c5f8979e48f..19d4d0b7ed564 100644 --- a/csharp/src/Apache.Arrow/Arrays/BooleanArray.cs +++ b/csharp/src/Apache.Arrow/Arrays/BooleanArray.cs @@ -21,7 +21,7 @@ namespace Apache.Arrow { - public class BooleanArray: Array, IReadOnlyList + public class BooleanArray: Array, IReadOnlyList, ICollection { public class Builder : IArrowArrayBuilder { @@ -188,7 +188,7 @@ public bool GetBoolean(int index) public bool? GetValue(int index) { return IsNull(index) - ? (bool?)null + ? null : BitUtility.GetBit(ValueBuffer.Span, index + Offset); } @@ -205,5 +205,30 @@ public bool GetBoolean(int index) } IEnumerator IEnumerable.GetEnumerator() => ((IEnumerable)this).GetEnumerator(); + + int ICollection.Count => Length; + bool ICollection.IsReadOnly => true; + void ICollection.Add(bool? item) => throw new NotSupportedException("Collection is read-only."); + bool ICollection.Remove(bool? item) => throw new NotSupportedException("Collection is read-only."); + void ICollection.Clear() => throw new NotSupportedException("Collection is read-only."); + + bool ICollection.Contains(bool? item) + { + for (int index = 0; index < Length; index++) + { + if (GetValue(index).Equals(item)) + return true; + } + + return false; + } + + void ICollection.CopyTo(bool?[] array, int arrayIndex) + { + for (int srcIndex = 0, destIndex = arrayIndex; srcIndex < Length; srcIndex++, destIndex++) + { + array[destIndex] = GetValue(srcIndex); + } + } } } diff --git a/csharp/src/Apache.Arrow/Arrays/Date32Array.cs b/csharp/src/Apache.Arrow/Arrays/Date32Array.cs index 6ab4986f573e2..55864e89e2eb3 100644 --- a/csharp/src/Apache.Arrow/Arrays/Date32Array.cs +++ b/csharp/src/Apache.Arrow/Arrays/Date32Array.cs @@ -23,9 +23,9 @@ namespace Apache.Arrow /// The class holds an array of dates in the Date32 format, where each date is /// stored as the number of days since the dawn of (UNIX) time. /// - public class Date32Array : PrimitiveArray, IReadOnlyList + public class Date32Array : PrimitiveArray, IReadOnlyList, ICollection #if NET6_0_OR_GREATER - , IReadOnlyList + , IReadOnlyList, ICollection #endif { private static readonly DateTime _epochDate = new DateTime(1970, 1, 1, 0, 0, 0, DateTimeKind.Unspecified); @@ -40,10 +40,9 @@ public class Builder : DateArrayBuilder { private class DateBuilder : PrimitiveArrayBuilder { - protected override Date32Array Build( - ArrowBuffer valueBuffer, ArrowBuffer nullBitmapBuffer, - int length, int nullCount, int offset) => - new Date32Array(valueBuffer, nullBitmapBuffer, length, nullCount, offset); + protected override Date32Array Build(ArrowBuffer valueBuffer, ArrowBuffer nullBitmapBuffer, int length, + int nullCount, int offset) => + new(valueBuffer, nullBitmapBuffer, length, nullCount, offset); } /// @@ -149,6 +148,31 @@ public Date32Array(ArrayData data) yield return GetDateOnly(index); }; } + + int ICollection.Count => Length; + bool ICollection.IsReadOnly => true; + void ICollection.Add(DateOnly? item) => throw new NotSupportedException("Collection is read-only."); + bool ICollection.Remove(DateOnly? item) => throw new NotSupportedException("Collection is read-only."); + void ICollection.Clear() => throw new NotSupportedException("Collection is read-only."); + + bool ICollection.Contains(DateOnly? item) + { + for (int index = 0; index < Length; index++) + { + if (GetDateOnly(index).Equals(item)) + return true; + } + + return false; + } + + void ICollection.CopyTo(DateOnly?[] array, int arrayIndex) + { + for (int srcIndex = 0, destIndex = arrayIndex; srcIndex < Length; srcIndex++, destIndex++) + { + array[destIndex] = GetDateOnly(srcIndex); + } + } #endif int IReadOnlyCollection.Count => Length; @@ -160,7 +184,32 @@ public Date32Array(ArrayData data) for (int index = 0; index < Length; index++) { yield return GetDateTime(index); - }; + } + } + + int ICollection.Count => Length; + bool ICollection.IsReadOnly => true; + void ICollection.Add(DateTime? item) => throw new NotSupportedException("Collection is read-only."); + bool ICollection.Remove(DateTime? item) => throw new NotSupportedException("Collection is read-only."); + void ICollection.Clear() => throw new NotSupportedException("Collection is read-only."); + + bool ICollection.Contains(DateTime? item) + { + for (int index = 0; index < Length; index++) + { + if (GetDateTime(index).Equals(item)) + return true; + } + + return false; + } + + void ICollection.CopyTo(DateTime?[] array, int arrayIndex) + { + for (int srcIndex = 0, destIndex = arrayIndex; srcIndex < Length; srcIndex++, destIndex++) + { + array[destIndex] = GetDateTime(srcIndex); + } } } } diff --git a/csharp/src/Apache.Arrow/Arrays/Date64Array.cs b/csharp/src/Apache.Arrow/Arrays/Date64Array.cs index 43e698e10b25c..77538ce59ffae 100644 --- a/csharp/src/Apache.Arrow/Arrays/Date64Array.cs +++ b/csharp/src/Apache.Arrow/Arrays/Date64Array.cs @@ -24,9 +24,9 @@ namespace Apache.Arrow /// stored as the number of milliseconds since the dawn of (UNIX) time, excluding leap seconds, in multiples of /// 86400000. /// - public class Date64Array : PrimitiveArray, IReadOnlyList + public class Date64Array : PrimitiveArray, IReadOnlyList, ICollection #if NET6_0_OR_GREATER - , IReadOnlyList + , IReadOnlyList, ICollection #endif { private const long MillisecondsPerDay = 86400000; @@ -45,10 +45,9 @@ public class Builder : DateArrayBuilder { private class DateBuilder : PrimitiveArrayBuilder { - protected override Date64Array Build( - ArrowBuffer valueBuffer, ArrowBuffer nullBitmapBuffer, - int length, int nullCount, int offset) => - new Date64Array(valueBuffer, nullBitmapBuffer, length, nullCount, offset); + protected override Date64Array Build(ArrowBuffer valueBuffer, ArrowBuffer nullBitmapBuffer, int length, + int nullCount, int offset) => + new(valueBuffer, nullBitmapBuffer, length, nullCount, offset); } /// @@ -151,6 +150,31 @@ public Date64Array(ArrayData data) yield return GetDateOnly(index); }; } + + int ICollection.Count => Length; + bool ICollection.IsReadOnly => true; + void ICollection.Add(DateOnly? item) => throw new NotSupportedException("Collection is read-only."); + bool ICollection.Remove(DateOnly? item) => throw new NotSupportedException("Collection is read-only."); + void ICollection.Clear() => throw new NotSupportedException("Collection is read-only."); + + bool ICollection.Contains(DateOnly? item) + { + for (int index = 0; index < Length; index++) + { + if (GetDateOnly(index).Equals(item)) + return true; + } + + return false; + } + + void ICollection.CopyTo(DateOnly?[] array, int arrayIndex) + { + for (int srcIndex = 0, destIndex = arrayIndex; srcIndex < Length; srcIndex++, destIndex++) + { + array[destIndex] = GetDateOnly(srcIndex); + } + } #endif int IReadOnlyCollection.Count => Length; @@ -162,7 +186,32 @@ public Date64Array(ArrayData data) for (int index = 0; index < Length; index++) { yield return GetDateTime(index); - }; + } + } + + int ICollection.Count => Length; + bool ICollection.IsReadOnly => true; + void ICollection.Add(DateTime? item) => throw new NotSupportedException("Collection is read-only."); + bool ICollection.Remove(DateTime? item) => throw new NotSupportedException("Collection is read-only."); + void ICollection.Clear() => throw new NotSupportedException("Collection is read-only."); + + bool ICollection.Contains(DateTime? item) + { + for (int index = 0; index < Length; index++) + { + if (GetDateTime(index).Equals(item)) + return true; + } + + return false; + } + + void ICollection.CopyTo(DateTime?[] array, int arrayIndex) + { + for (int srcIndex = 0, destIndex = arrayIndex; srcIndex < Length; srcIndex++, destIndex++) + { + array[destIndex] = GetDateTime(srcIndex); + } } } } diff --git a/csharp/src/Apache.Arrow/Arrays/Decimal256Array.cs b/csharp/src/Apache.Arrow/Arrays/Decimal256Array.cs index fa6f765475240..52bfb9eb20768 100644 --- a/csharp/src/Apache.Arrow/Arrays/Decimal256Array.cs +++ b/csharp/src/Apache.Arrow/Arrays/Decimal256Array.cs @@ -13,6 +13,8 @@ // See the License for the specific language governing permissions and // limitations under the License. +#nullable enable + using System; using System.Collections; using System.Collections.Generic; @@ -23,7 +25,7 @@ namespace Apache.Arrow { - public class Decimal256Array : FixedSizeBinaryArray, IReadOnlyList, IReadOnlyList + public class Decimal256Array : FixedSizeBinaryArray, IReadOnlyList, IReadOnlyList { public class Builder : BuilderBase { @@ -178,7 +180,7 @@ public Decimal256Array(ArrayData data) return list; } - public string GetString(int index) + public string? GetString(int index) { if (IsNull(index)) { @@ -230,10 +232,10 @@ public bool TryGetSqlDecimal(int index, out SqlDecimal? value) } } - int IReadOnlyCollection.Count => Length; - string? IReadOnlyList.this[int index] => GetString(index); + int IReadOnlyCollection.Count => Length; + string? IReadOnlyList.this[int index] => GetString(index); - IEnumerator IEnumerable.GetEnumerator() + IEnumerator IEnumerable.GetEnumerator() { for (int index = 0; index < Length; index++) { @@ -241,6 +243,6 @@ IEnumerator IEnumerable.GetEnumerator() } } - IEnumerator IEnumerable.GetEnumerator() => ((IEnumerable)this).GetEnumerator(); + IEnumerator IEnumerable.GetEnumerator() => ((IEnumerable)this).GetEnumerator(); } } diff --git a/csharp/src/Apache.Arrow/Arrays/IntervalArray.cs b/csharp/src/Apache.Arrow/Arrays/IntervalArray.cs index de4fc42b4cf92..3949af877b0c5 100644 --- a/csharp/src/Apache.Arrow/Arrays/IntervalArray.cs +++ b/csharp/src/Apache.Arrow/Arrays/IntervalArray.cs @@ -31,7 +31,7 @@ internal static class IntervalArray } public abstract class IntervalArray : PrimitiveArray - where T : struct + where T : struct, IEquatable { protected IntervalArray(ArrayData data) : base(data) diff --git a/csharp/src/Apache.Arrow/Arrays/PrimitiveArray.cs b/csharp/src/Apache.Arrow/Arrays/PrimitiveArray.cs index 0456c5cc65ba4..05d659b5270ad 100644 --- a/csharp/src/Apache.Arrow/Arrays/PrimitiveArray.cs +++ b/csharp/src/Apache.Arrow/Arrays/PrimitiveArray.cs @@ -20,8 +20,8 @@ namespace Apache.Arrow { - public abstract class PrimitiveArray : Array, IReadOnlyList - where T : struct + public abstract class PrimitiveArray : Array, IReadOnlyList, ICollection + where T : struct, IEquatable { protected PrimitiveArray(ArrayData data) : base(data) @@ -40,7 +40,7 @@ protected PrimitiveArray(ArrayData data) { throw new ArgumentOutOfRangeException(nameof(index)); } - return IsValid(index) ? Values[index] : (T?)null; + return IsValid(index) ? Values[index] : null; } public IList ToList(bool includeNulls = false) @@ -86,5 +86,36 @@ IEnumerator IEnumerable.GetEnumerator() yield return IsValid(index) ? Values[index] : null; } } + + int ICollection.Count => Length; + bool ICollection.IsReadOnly => true; + void ICollection.Add(T? item) => throw new NotSupportedException("Collection is read-only."); + bool ICollection.Remove(T? item) => throw new NotSupportedException("Collection is read-only."); + void ICollection.Clear() => throw new NotSupportedException("Collection is read-only."); + + bool ICollection.Contains(T? item) + { + if (item == null) + { + return NullCount > 0; + } + + ReadOnlySpan values = Values; + while (values.Length > 0) + { + int index = Values.IndexOf(item.Value); + if (index < 0 || IsValid(index)) { return index >= 0; } + values = values.Slice(index + 1); + } + return false; + } + + void ICollection.CopyTo(T?[] array, int arrayIndex) + { + for (int srcIndex = 0, destIndex = arrayIndex; srcIndex < Length; srcIndex++, destIndex++) + { + array[destIndex] = GetValue(srcIndex); + } + } } } diff --git a/csharp/src/Apache.Arrow/Arrays/PrimitiveArrayBuilder.cs b/csharp/src/Apache.Arrow/Arrays/PrimitiveArrayBuilder.cs index 67fe46633c18f..ae02173fb0df4 100644 --- a/csharp/src/Apache.Arrow/Arrays/PrimitiveArrayBuilder.cs +++ b/csharp/src/Apache.Arrow/Arrays/PrimitiveArrayBuilder.cs @@ -20,7 +20,7 @@ namespace Apache.Arrow { - public abstract class PrimitiveArrayBuilder : IArrowArrayBuilder + public abstract class PrimitiveArrayBuilder : IArrowArrayBuilder where TTo : struct where TArray : IArrowArray where TBuilder : class, IArrowArrayBuilder diff --git a/csharp/src/Apache.Arrow/Arrays/StringArray.cs b/csharp/src/Apache.Arrow/Arrays/StringArray.cs index a3ec596adc7ba..ab44805d8d1e9 100644 --- a/csharp/src/Apache.Arrow/Arrays/StringArray.cs +++ b/csharp/src/Apache.Arrow/Arrays/StringArray.cs @@ -22,7 +22,7 @@ namespace Apache.Arrow { - public class StringArray: BinaryArray, IReadOnlyList + public class StringArray: BinaryArray, IReadOnlyList, ICollection { public static readonly Encoding DefaultEncoding = Encoding.UTF8; @@ -164,5 +164,30 @@ IEnumerator IEnumerable.GetEnumerator() } IEnumerator IEnumerable.GetEnumerator() => ((IEnumerable)this).GetEnumerator(); + + int ICollection.Count => Length; + bool ICollection.IsReadOnly => true; + void ICollection.Add(string item) => throw new NotSupportedException("Collection is read-only."); + bool ICollection.Remove(string item) => throw new NotSupportedException("Collection is read-only."); + void ICollection.Clear() => throw new NotSupportedException("Collection is read-only."); + + bool ICollection.Contains(string item) + { + for (int index = 0; index < Length; index++) + { + if (GetString(index) == item) + return true; + } + + return false; + } + + void ICollection.CopyTo(string[] array, int arrayIndex) + { + for (int srcIndex = 0, destIndex = arrayIndex; srcIndex < Length; srcIndex++, destIndex++) + { + array[destIndex] = GetString(srcIndex); + } + } } } diff --git a/csharp/src/Apache.Arrow/Arrays/Time32Array.cs b/csharp/src/Apache.Arrow/Arrays/Time32Array.cs index e9c2d7a4d9b28..63c0898935ba5 100644 --- a/csharp/src/Apache.Arrow/Arrays/Time32Array.cs +++ b/csharp/src/Apache.Arrow/Arrays/Time32Array.cs @@ -26,7 +26,7 @@ namespace Apache.Arrow /// public class Time32Array : PrimitiveArray #if NET6_0_OR_GREATER - , IReadOnlyList + , IReadOnlyList, ICollection #endif { /// @@ -171,6 +171,31 @@ public Time32Array(ArrayData data) yield return GetTime(index); }; } + + int ICollection.Count => Length; + bool ICollection.IsReadOnly => true; + void ICollection.Add(TimeOnly? item) => throw new NotSupportedException("Collection is read-only."); + bool ICollection.Remove(TimeOnly? item) => throw new NotSupportedException("Collection is read-only."); + void ICollection.Clear() => throw new NotSupportedException("Collection is read-only."); + + bool ICollection.Contains(TimeOnly? item) + { + for (int index = 0; index < Length; index++) + { + if (GetTime(index).Equals(item)) + return true; + } + + return false; + } + + void ICollection.CopyTo(TimeOnly?[] array, int arrayIndex) + { + for (int srcIndex = 0, destIndex = arrayIndex; srcIndex < Length; srcIndex++, destIndex++) + { + array[destIndex] = GetTime(srcIndex); + } + } #endif } } diff --git a/csharp/src/Apache.Arrow/Arrays/Time64Array.cs b/csharp/src/Apache.Arrow/Arrays/Time64Array.cs index fc18dfb8bf726..5518462952050 100644 --- a/csharp/src/Apache.Arrow/Arrays/Time64Array.cs +++ b/csharp/src/Apache.Arrow/Arrays/Time64Array.cs @@ -26,7 +26,7 @@ namespace Apache.Arrow /// public class Time64Array : PrimitiveArray #if NET6_0_OR_GREATER - , IReadOnlyList + , IReadOnlyList, ICollection #endif { /// @@ -162,6 +162,31 @@ public Time64Array(ArrayData data) yield return GetTime(index); }; } + + int ICollection.Count => Length; + bool ICollection.IsReadOnly => true; + void ICollection.Add(TimeOnly? item) => throw new NotSupportedException("Collection is read-only."); + bool ICollection.Remove(TimeOnly? item) => throw new NotSupportedException("Collection is read-only."); + void ICollection.Clear() => throw new NotSupportedException("Collection is read-only."); + + bool ICollection.Contains(TimeOnly? item) + { + for (int index = 0; index < Length; index++) + { + if (GetTime(index).Equals(item)) + return true; + } + + return false; + } + + void ICollection.CopyTo(TimeOnly?[] array, int arrayIndex) + { + for (int srcIndex = 0, destIndex = arrayIndex; srcIndex < Length; srcIndex++, destIndex++) + { + array[destIndex] = GetTime(srcIndex); + } + } #endif } } diff --git a/csharp/src/Apache.Arrow/Arrays/TimestampArray.cs b/csharp/src/Apache.Arrow/Arrays/TimestampArray.cs index ccb656854a5df..b83860584707e 100644 --- a/csharp/src/Apache.Arrow/Arrays/TimestampArray.cs +++ b/csharp/src/Apache.Arrow/Arrays/TimestampArray.cs @@ -21,7 +21,7 @@ namespace Apache.Arrow { - public class TimestampArray : PrimitiveArray, IReadOnlyList + public class TimestampArray : PrimitiveArray, IReadOnlyList, ICollection { private static readonly DateTimeOffset s_epoch = new DateTimeOffset(1970, 1, 1, 0, 0, 0, 0, TimeSpan.Zero); @@ -157,5 +157,30 @@ public DateTimeOffset GetTimestampUnchecked(int index) yield return GetTimestamp(index); }; } + + int ICollection.Count => Length; + bool ICollection.IsReadOnly => true; + void ICollection.Add(DateTimeOffset? item) => throw new NotSupportedException("Collection is read-only."); + bool ICollection.Remove(DateTimeOffset? item) => throw new NotSupportedException("Collection is read-only."); + void ICollection.Clear() => throw new NotSupportedException("Collection is read-only."); + + bool ICollection.Contains(DateTimeOffset? item) + { + for (int index = 0; index < Length; index++) + { + if (GetTimestamp(index).Equals(item)) + return true; + } + + return false; + } + + void ICollection.CopyTo(DateTimeOffset?[] array, int arrayIndex) + { + for (int srcIndex = 0, destIndex = arrayIndex; srcIndex < Length; srcIndex++, destIndex++) + { + array[destIndex] = GetTimestamp(srcIndex); + } + } } } diff --git a/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs b/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs index b11479c0d4460..c66569afeba85 100644 --- a/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs +++ b/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs @@ -148,7 +148,7 @@ public void VisitArray(IArrowArray array) public void Visit(MonthDayNanosecondIntervalArray array) => VisitPrimitiveArray(array); private void VisitPrimitiveArray(PrimitiveArray array) - where T : struct + where T : struct, IEquatable { _buffers.Add(CreateBitmapBuffer(array.NullBitmapBuffer, array.Offset, array.Length)); _buffers.Add(CreateSlicedBuffer(array.ValueBuffer, array.Offset, array.Length)); diff --git a/csharp/src/Apache.Arrow/Memory/NativeMemoryManager.cs b/csharp/src/Apache.Arrow/Memory/NativeMemoryManager.cs index 8f0210b28240f..d42ee5279e795 100644 --- a/csharp/src/Apache.Arrow/Memory/NativeMemoryManager.cs +++ b/csharp/src/Apache.Arrow/Memory/NativeMemoryManager.cs @@ -40,10 +40,12 @@ internal NativeMemoryManager(INativeAllocationOwner owner, IntPtr ptr, int offse _owner = owner; } +#pragma warning disable CA2015 // TODO: is this correct? ~NativeMemoryManager() { Dispose(false); } +#pragma warning restore CA2015 public override unsafe Span GetSpan() { diff --git a/csharp/test/Apache.Arrow.Benchmarks/Apache.Arrow.Benchmarks.csproj b/csharp/test/Apache.Arrow.Benchmarks/Apache.Arrow.Benchmarks.csproj index d44b7488e3b17..f735f01b022d0 100644 --- a/csharp/test/Apache.Arrow.Benchmarks/Apache.Arrow.Benchmarks.csproj +++ b/csharp/test/Apache.Arrow.Benchmarks/Apache.Arrow.Benchmarks.csproj @@ -2,7 +2,7 @@ Exe - net7.0 + net8.0 diff --git a/csharp/test/Apache.Arrow.Compression.Tests/Apache.Arrow.Compression.Tests.csproj b/csharp/test/Apache.Arrow.Compression.Tests/Apache.Arrow.Compression.Tests.csproj index 5cc0d303e881e..bd97372d1021b 100644 --- a/csharp/test/Apache.Arrow.Compression.Tests/Apache.Arrow.Compression.Tests.csproj +++ b/csharp/test/Apache.Arrow.Compression.Tests/Apache.Arrow.Compression.Tests.csproj @@ -1,15 +1,15 @@ - net7.0 + net8.0 false - - - + + + diff --git a/csharp/test/Apache.Arrow.Flight.Sql.Tests/Apache.Arrow.Flight.Sql.Tests.csproj b/csharp/test/Apache.Arrow.Flight.Sql.Tests/Apache.Arrow.Flight.Sql.Tests.csproj index 5b7c10f35bed0..5a5a92ccd2c7f 100644 --- a/csharp/test/Apache.Arrow.Flight.Sql.Tests/Apache.Arrow.Flight.Sql.Tests.csproj +++ b/csharp/test/Apache.Arrow.Flight.Sql.Tests/Apache.Arrow.Flight.Sql.Tests.csproj @@ -1,14 +1,14 @@ - net7.0 + net8.0 false - - - + + + diff --git a/csharp/test/Apache.Arrow.Flight.Sql.Tests/FlightSqlServerTests.cs b/csharp/test/Apache.Arrow.Flight.Sql.Tests/FlightSqlServerTests.cs index 4ad5bde0874a8..e5e64b073f799 100644 --- a/csharp/test/Apache.Arrow.Flight.Sql.Tests/FlightSqlServerTests.cs +++ b/csharp/test/Apache.Arrow.Flight.Sql.Tests/FlightSqlServerTests.cs @@ -14,6 +14,7 @@ // limitations under the License. #nullable enable + using System; using System.Collections.Generic; using System.Collections.ObjectModel; @@ -65,7 +66,7 @@ public async Task EnsureTheCorrectActionsAreGiven() var streamWriter = new MockServerStreamWriter(); //When - await producer.ListActions(streamWriter, new MockServerCallContext()).ConfigureAwait(false); + await producer.ListActions(streamWriter, new MockServerCallContext()); var actions = streamWriter.Messages.ToArray(); Assert.Equal(FlightSqlUtils.FlightSqlActions, actions); @@ -115,7 +116,7 @@ public void EnsureTableSchemaIsCorrectWithoutTableSchema(bool includeTableSchema [InlineData(typeof(CommandGetImportedKeys), "GetImportedKeysFlightInfo")] [InlineData(typeof(CommandGetCrossReference), "GetCrossReferenceFlightInfo")] [InlineData(typeof(CommandGetXdbcTypeInfo), "GetXdbcTypeFlightInfo")] - public async void EnsureGetFlightInfoIsCorrectlyRoutedForCommand(Type commandType, string expectedResult) + public async Task EnsureGetFlightInfoIsCorrectlyRoutedForCommand(Type commandType, string expectedResult) { //Given var command = (IMessage) Activator.CreateInstance(commandType)!; @@ -131,7 +132,7 @@ public async void EnsureGetFlightInfoIsCorrectlyRoutedForCommand(Type commandTyp [Fact] - public async void EnsureAnInvalidOperationExceptionIsThrownWhenACommandIsNotSupportedAndHasNoDescriptor() + public async Task EnsureAnInvalidOperationExceptionIsThrownWhenACommandIsNotSupportedAndHasNoDescriptor() { //Given var producer = new TestFlightSqlSever(); @@ -145,7 +146,7 @@ public async void EnsureAnInvalidOperationExceptionIsThrownWhenACommandIsNotSupp } [Fact] - public async void EnsureAnInvalidOperationExceptionIsThrownWhenACommandIsNotSupported() + public async Task EnsureAnInvalidOperationExceptionIsThrownWhenACommandIsNotSupported() { //Given var producer = new TestFlightSqlSever(); @@ -175,7 +176,7 @@ public async void EnsureAnInvalidOperationExceptionIsThrownWhenACommandIsNotSupp [InlineData(typeof(CommandGetImportedKeys), "DoGetImportedKeys")] [InlineData(typeof(CommandGetCrossReference), "DoGetCrossReference")] [InlineData(typeof(CommandGetXdbcTypeInfo), "DoGetXbdcTypeInfo")] - public async void EnsureDoGetIsCorrectlyRoutedForADoGetCommand(Type commandType, string expectedResult) + public async Task EnsureDoGetIsCorrectlyRoutedForADoGetCommand(Type commandType, string expectedResult) { //Given var producer = new TestFlightSqlSever(); @@ -192,7 +193,7 @@ public async void EnsureDoGetIsCorrectlyRoutedForADoGetCommand(Type commandType, } [Fact] - public async void EnsureAnInvalidOperationExceptionIsThrownWhenADoGetCommandIsNotSupported() + public async Task EnsureAnInvalidOperationExceptionIsThrownWhenADoGetCommandIsNotSupported() { //Given var producer = new TestFlightSqlSever(); @@ -213,7 +214,7 @@ public async void EnsureAnInvalidOperationExceptionIsThrownWhenADoGetCommandIsNo [InlineData(SqlAction.CloseRequest, typeof(ActionClosePreparedStatementRequest), "ClosePreparedStatement")] [InlineData(SqlAction.CreateRequest, typeof(ActionCreatePreparedStatementRequest), "CreatePreparedStatement")] [InlineData("BadCommand", typeof(ActionCreatePreparedStatementRequest), "Action type BadCommand not supported", true)] - public async void EnsureDoActionIsCorrectlyRoutedForAnActionRequest(string actionType, Type actionBodyType, string expectedResponse, bool isException = false) + public async Task EnsureDoActionIsCorrectlyRoutedForAnActionRequest(string actionType, Type actionBodyType, string expectedResponse, bool isException = false) { //Given var producer = new TestFlightSqlSever(); @@ -237,19 +238,19 @@ public async void EnsureDoActionIsCorrectlyRoutedForAnActionRequest(string actio [InlineData(typeof(CommandPreparedStatementQuery), "PutPreparedStatementQuery")] [InlineData(typeof(CommandPreparedStatementUpdate), "PutPreparedStatementUpdate")] [InlineData(typeof(CommandGetXdbcTypeInfo), "Command CommandGetXdbcTypeInfo not supported", true)] - public async void EnsureDoPutIsCorrectlyRoutedForTheCommand(Type commandType, string expectedResponse, bool isException = false) + public async Task EnsureDoPutIsCorrectlyRoutedForTheCommand(Type commandType, string expectedResponse, bool isException = false) { //Given var command = (IMessage) Activator.CreateInstance(commandType)!; var producer = new TestFlightSqlSever(); var descriptor = FlightDescriptor.CreateCommandDescriptor(command.PackAndSerialize().ToArray()); var recordBatch = new RecordBatch(new Schema(new List(), null), System.Array.Empty(), 0); - var reader = new MockStreamReader(await recordBatch.ToFlightData(descriptor).ConfigureAwait(false)); + var reader = new MockStreamReader(await recordBatch.ToFlightData(descriptor)); var batchReader = new FlightServerRecordBatchStreamReader(reader); var mockStreamWriter = new MockServerStreamWriter(); //When - async Task Act() => await producer.DoPut(batchReader, mockStreamWriter, new MockServerCallContext()).ConfigureAwait(false); + async Task Act() => await producer.DoPut(batchReader, mockStreamWriter, new MockServerCallContext()); var exception = await Record.ExceptionAsync(Act); string? actualMessage = isException ? exception?.Message : mockStreamWriter.Messages[0].ApplicationMetadata.ToStringUtf8(); @@ -271,7 +272,7 @@ private class MockServerCallContext : ServerCallContext protected override CancellationToken CancellationTokenCore => default; protected override Metadata ResponseTrailersCore => new(); protected override Status StatusCore { get; set; } - protected override WriteOptions WriteOptionsCore { get; set; } = WriteOptions.Default; + protected override WriteOptions? WriteOptionsCore { get; set; } = WriteOptions.Default; protected override AuthContext AuthContextCore => new("", new Dictionary>()); } } @@ -325,7 +326,7 @@ public static async Task GetSchema(this IEnumerable flightDa public static async Task> ToFlightData(this RecordBatch recordBatch, FlightDescriptor? descriptor = null) { var responseStream = new MockFlightServerRecordBatchStreamWriter(); - await responseStream.WriteRecordBatchAsync(recordBatch).ConfigureAwait(false); + await responseStream.WriteRecordBatchAsync(recordBatch); if (descriptor == null) { return responseStream.FlightData; diff --git a/csharp/test/Apache.Arrow.Flight.TestWeb/Apache.Arrow.Flight.TestWeb.csproj b/csharp/test/Apache.Arrow.Flight.TestWeb/Apache.Arrow.Flight.TestWeb.csproj index d7a2042a4581a..789fb9569edba 100644 --- a/csharp/test/Apache.Arrow.Flight.TestWeb/Apache.Arrow.Flight.TestWeb.csproj +++ b/csharp/test/Apache.Arrow.Flight.TestWeb/Apache.Arrow.Flight.TestWeb.csproj @@ -1,11 +1,11 @@ - net7.0 + net8.0 - + diff --git a/csharp/test/Apache.Arrow.Flight.Tests/Apache.Arrow.Flight.Tests.csproj b/csharp/test/Apache.Arrow.Flight.Tests/Apache.Arrow.Flight.Tests.csproj index 050d0f452cc4e..132f17fa212a5 100644 --- a/csharp/test/Apache.Arrow.Flight.Tests/Apache.Arrow.Flight.Tests.csproj +++ b/csharp/test/Apache.Arrow.Flight.Tests/Apache.Arrow.Flight.Tests.csproj @@ -1,14 +1,14 @@ - net7.0 + net8.0 false - - - + + + diff --git a/csharp/test/Apache.Arrow.Flight.Tests/FlightTests.cs b/csharp/test/Apache.Arrow.Flight.Tests/FlightTests.cs index ebc38354b5c28..aac4e4209240a 100644 --- a/csharp/test/Apache.Arrow.Flight.Tests/FlightTests.cs +++ b/csharp/test/Apache.Arrow.Flight.Tests/FlightTests.cs @@ -288,9 +288,9 @@ public async Task TestHandshake() { var duplexStreamingCall = _flightClient.Handshake(); - await duplexStreamingCall.RequestStream.WriteAsync(new FlightHandshakeRequest(ByteString.Empty)).ConfigureAwait(false); - await duplexStreamingCall.RequestStream.CompleteAsync().ConfigureAwait(false); - var results = await duplexStreamingCall.ResponseStream.ToListAsync().ConfigureAwait(false); + await duplexStreamingCall.RequestStream.WriteAsync(new FlightHandshakeRequest(ByteString.Empty)); + await duplexStreamingCall.RequestStream.CompleteAsync(); + var results = await duplexStreamingCall.ResponseStream.ToListAsync(); Assert.Single(results); Assert.Equal("Done", results.First().Payload.ToStringUtf8()); @@ -303,10 +303,10 @@ public async Task TestSingleExchange() var duplexStreamingCall = _flightClient.DoExchange(flightDescriptor); var expectedBatch = CreateTestBatch(0, 100); - await duplexStreamingCall.RequestStream.WriteAsync(expectedBatch).ConfigureAwait(false); - await duplexStreamingCall.RequestStream.CompleteAsync().ConfigureAwait(false); + await duplexStreamingCall.RequestStream.WriteAsync(expectedBatch); + await duplexStreamingCall.RequestStream.CompleteAsync(); - var results = await duplexStreamingCall.ResponseStream.ToListAsync().ConfigureAwait(false); + var results = await duplexStreamingCall.ResponseStream.ToListAsync(); Assert.Single(results); ArrowReaderVerifier.CompareBatches(expectedBatch, results.FirstOrDefault()); @@ -320,11 +320,11 @@ public async Task TestMultipleExchange() var expectedBatch1 = CreateTestBatch(0, 100); var expectedBatch2 = CreateTestBatch(100, 100); - await duplexStreamingCall.RequestStream.WriteAsync(expectedBatch1).ConfigureAwait(false); - await duplexStreamingCall.RequestStream.WriteAsync(expectedBatch2).ConfigureAwait(false); - await duplexStreamingCall.RequestStream.CompleteAsync().ConfigureAwait(false); + await duplexStreamingCall.RequestStream.WriteAsync(expectedBatch1); + await duplexStreamingCall.RequestStream.WriteAsync(expectedBatch2); + await duplexStreamingCall.RequestStream.CompleteAsync(); - var results = await duplexStreamingCall.ResponseStream.ToListAsync().ConfigureAwait(false); + var results = await duplexStreamingCall.ResponseStream.ToListAsync(); ArrowReaderVerifier.CompareBatches(expectedBatch1, results[0]); ArrowReaderVerifier.CompareBatches(expectedBatch2, results[1]); @@ -338,8 +338,8 @@ public async Task TestExchangeWithMetadata() var expectedBatch = CreateTestBatch(0, 100); var expectedMetadata = ByteString.CopyFromUtf8("test metadata"); - await duplexStreamingCall.RequestStream.WriteAsync(expectedBatch, expectedMetadata).ConfigureAwait(false); - await duplexStreamingCall.RequestStream.CompleteAsync().ConfigureAwait(false); + await duplexStreamingCall.RequestStream.WriteAsync(expectedBatch, expectedMetadata); + await duplexStreamingCall.RequestStream.CompleteAsync(); List actualMetadata = new List(); List actualBatch = new List(); @@ -358,9 +358,9 @@ public async Task TestHandshakeWithSpecificMessage() { var duplexStreamingCall = _flightClient.Handshake(); - await duplexStreamingCall.RequestStream.WriteAsync(new FlightHandshakeRequest(ByteString.CopyFromUtf8("Hello"))).ConfigureAwait(false); - await duplexStreamingCall.RequestStream.CompleteAsync().ConfigureAwait(false); - var results = await duplexStreamingCall.ResponseStream.ToListAsync().ConfigureAwait(false); + await duplexStreamingCall.RequestStream.WriteAsync(new FlightHandshakeRequest(ByteString.CopyFromUtf8("Hello"))); + await duplexStreamingCall.RequestStream.CompleteAsync(); + var results = await duplexStreamingCall.ResponseStream.ToListAsync(); Assert.Single(results); Assert.Equal("Hello handshake", results.First().Payload.ToStringUtf8()); diff --git a/csharp/test/Apache.Arrow.IntegrationTest/Apache.Arrow.IntegrationTest.csproj b/csharp/test/Apache.Arrow.IntegrationTest/Apache.Arrow.IntegrationTest.csproj index e77f329bf2a15..7f226fd08818f 100644 --- a/csharp/test/Apache.Arrow.IntegrationTest/Apache.Arrow.IntegrationTest.csproj +++ b/csharp/test/Apache.Arrow.IntegrationTest/Apache.Arrow.IntegrationTest.csproj @@ -4,7 +4,7 @@ Exe true - net7.0 + net8.0 diff --git a/csharp/test/Apache.Arrow.IntegrationTest/JsonFile.cs b/csharp/test/Apache.Arrow.IntegrationTest/JsonFile.cs index 31a5676f01315..7232f74b8bec6 100644 --- a/csharp/test/Apache.Arrow.IntegrationTest/JsonFile.cs +++ b/csharp/test/Apache.Arrow.IntegrationTest/JsonFile.cs @@ -908,8 +908,8 @@ private static byte[] ConvertHexStringToByteArray(string hexString) }; private void GenerateArray(Func createArray) + where T : struct, IEquatable where TArray : PrimitiveArray - where T : struct { ArrowBuffer validityBuffer = GetValidityBuffer(out int nullCount); @@ -929,8 +929,8 @@ private void GenerateArray(Func(Func createArray, Func parse) + where T : struct, IEquatable where TArray : PrimitiveArray - where T : struct { ArrowBuffer validityBuffer = GetValidityBuffer(out int nullCount); @@ -950,8 +950,8 @@ private void GenerateLongArray(Func(Func createArray, Func construct) + where T : struct, IEquatable where TArray : PrimitiveArray - where T : struct { ArrowBuffer validityBuffer = GetValidityBuffer(out int nullCount); diff --git a/csharp/test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj b/csharp/test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj index 92f6e2d662f38..a3290e3be14ee 100644 --- a/csharp/test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj +++ b/csharp/test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj @@ -4,19 +4,21 @@ true true + + true - net7.0;net472;net462 + net8.0;net472;net462 - net7.0 + net8.0 - - - + + + all runtime; build; native; contentfiles; analyzers diff --git a/csharp/test/Apache.Arrow.Tests/ArrowArrayTests.cs b/csharp/test/Apache.Arrow.Tests/ArrowArrayTests.cs index 682ebec323dc0..c3c21c412d20d 100644 --- a/csharp/test/Apache.Arrow.Tests/ArrowArrayTests.cs +++ b/csharp/test/Apache.Arrow.Tests/ArrowArrayTests.cs @@ -101,9 +101,9 @@ public void EnumerateArray() { var array = new Int64Array.Builder().Append(1).Append(2).Build(); - foreach(long? foo in (IEnumerable)array) + foreach(long? foo in array) { - Assert.InRange(foo.Value, 1, 2); + Assert.InRange(foo!.Value, 1, 2); } foreach (object foo in (IEnumerable)array) @@ -115,12 +115,145 @@ public void EnumerateArray() [Fact] public void ArrayAsReadOnlyList() { - Int64Array array = new Int64Array.Builder().Append(1).Append(2).Build(); - var readOnlyList = (IReadOnlyList)array; + TestArrayAsReadOnlyList([1, 2]); + TestArrayAsReadOnlyList([1, 2]); + TestArrayAsReadOnlyList([true, false]); + TestArrayAsReadOnlyList([DateTime.MinValue.Date, DateTime.MaxValue.Date]); + TestArrayAsReadOnlyList([DateTime.MinValue.Date, DateTime.MaxValue.Date]); + TestArrayAsReadOnlyList([DateTimeOffset.MinValue, DateTimeOffset.MinValue.AddYears(100)]); + +#if NET5_0_OR_GREATER + TestArrayAsReadOnlyList([DateOnly.MinValue, DateOnly.MaxValue]); + TestArrayAsReadOnlyList([DateOnly.MinValue, DateOnly.MaxValue]); + TestArrayAsReadOnlyList([TimeOnly.MinValue, TimeOnly.MinValue.AddHours(23)]); + TestArrayAsReadOnlyList([TimeOnly.MinValue, TimeOnly.MaxValue]); + TestArrayAsReadOnlyList([(Half)1.1, (Half)2.2f]); +#endif + } + + // Parameter 'values' must contain two distinct values + private static void TestArrayAsReadOnlyList(IReadOnlyList values) + where T : struct + where TArray : IArrowArray + where TArrayBuilder : IArrowArrayBuilder, new() + { + Assert.Equal(2, values.Count); + TArray array = new TArrayBuilder().Append(values[0]).AppendNull().Append(values[1]).Build(default); + Assert.NotNull(array); + var readOnlyList = (IReadOnlyList)array; Assert.Equal(array.Length, readOnlyList.Count); - Assert.Equal(readOnlyList[0], 1); - Assert.Equal(readOnlyList[1], 2); + Assert.Equal(3, readOnlyList.Count); + Assert.Equal(values[0], readOnlyList[0]); + Assert.Null(readOnlyList[1]); + Assert.Equal(values[1], readOnlyList[2]); + } + + [Fact] + public void ArrayAsCollection() + { + TestPrimitiveArrayAsCollection([1, 2, 3, 4]); + TestPrimitiveArrayAsCollection([1, 2, 3, 4]); + TestPrimitiveArrayAsCollection([true, true, true, false]); + TestPrimitiveArrayAsCollection([DateTime.MinValue.Date, DateTime.MaxValue.Date, DateTime.Today, DateTime.Today]); + TestPrimitiveArrayAsCollection([DateTime.MinValue.Date, DateTime.MaxValue.Date, DateTime.Today, DateTime.Today]); + TestPrimitiveArrayAsCollection([DateTimeOffset.MinValue, DateTimeOffset.MinValue.AddYears(100), DateTimeOffset.Now, DateTimeOffset.UtcNow]); + +#if NET5_0_OR_GREATER + TestPrimitiveArrayAsCollection([DateOnly.MinValue, DateOnly.MaxValue, DateOnly.FromDayNumber(1), DateOnly.FromDayNumber(2)]); + TestPrimitiveArrayAsCollection([DateOnly.MinValue, DateOnly.MaxValue, DateOnly.FromDayNumber(1), DateOnly.FromDayNumber(2)]); + TestPrimitiveArrayAsCollection([TimeOnly.MinValue, TimeOnly.MinValue.AddHours(23), TimeOnly.MinValue.AddHours(1), TimeOnly.MinValue.AddHours(2)]); + TestPrimitiveArrayAsCollection([TimeOnly.MinValue, TimeOnly.MaxValue, TimeOnly.MinValue.AddHours(1), TimeOnly.MinValue.AddHours(2)]); + TestPrimitiveArrayAsCollection([(Half)1.1, (Half)2.2f, (Half)3.3f, (Half)4.4f]); +#endif + + byte[][] byteArrs = [new byte[1], [], [255], new byte[2]]; + TestObjectArrayAsCollection(new BinaryArray.Builder().Append(byteArrs[0].AsEnumerable()).AppendNull().Append(byteArrs[1].AsEnumerable()).Append(byteArrs[0].AsEnumerable()).Build(), System.Array.Empty(), byteArrs); + + string[] strings = ["abc", "abd", "acd", "adc"]; + TestObjectArrayAsCollection(new StringArray.Builder().Append(strings[0]).AppendNull().Append(strings[1]).Append(strings[0]).Build(), null, strings); + } + + // Parameter 'values' must contain four values. The last value must be distinct from the rest. + private static void TestPrimitiveArrayAsCollection(IReadOnlyList values) + where T : struct + where TArray : IArrowArray, ICollection + where TArrayBuilder : IArrowArrayBuilder, new() + { + Assert.Equal(4, values.Count); + TArray array = new TArrayBuilder().Append(values[0]).AppendNull().Append(values[1]).Append(values[0]).Build(default); + Assert.NotNull(array); + var collection = (ICollection)array; + + Assert.Equal(array.Length, collection.Count); + Assert.Equal(4, collection.Count); + Assert.True(collection.IsReadOnly); + + Assert.Equal("Collection is read-only.", Assert.Throws(() => collection.Add(values[3])).Message); + Assert.Equal("Collection is read-only.", Assert.Throws(() => collection.Remove(values[3])).Message); + Assert.Equal("Collection is read-only.", Assert.Throws(collection.Clear).Message); + + Assert.True(collection.Contains(values[0])); + Assert.True(collection.Contains(values[1])); + Assert.True(collection.Contains(default)); + Assert.False(collection.Contains(values[3])); + + T sentinel = values[2]; + T?[] destArr = { sentinel, sentinel, sentinel, sentinel, sentinel, sentinel }; + collection.CopyTo(destArr, 1); + Assert.Equal(sentinel, destArr[0]); + Assert.Equal(values[0], destArr[1]); + Assert.Null(destArr[2]); + Assert.Equal(values[1], destArr[3]); + Assert.Equal(values[0], destArr[4]); + Assert.Equal(sentinel, destArr[0]); + } + + // Parameter 'values' must contain four values. The last value must be distinct from the rest. + private static void TestObjectArrayAsCollection(TArray array, T nullValue, IReadOnlyList values) + where T : class + where TArray : IArrowArray, ICollection + { + Assert.NotNull(array); + Assert.Equal(4, values.Count); + var collection = (ICollection)array; + + Assert.Equal(array.Length, collection.Count); + Assert.Equal(4, collection.Count); + Assert.True(collection.IsReadOnly); + + Assert.Equal("Collection is read-only.", Assert.Throws(() => collection.Add(values[3])).Message); + Assert.Equal("Collection is read-only.", Assert.Throws(() => collection.Remove(values[3])).Message); + Assert.Equal("Collection is read-only.", Assert.Throws(collection.Clear).Message); + + Assert.True(collection.Contains(values[0])); + Assert.True(collection.Contains(values[1])); + Assert.True(collection.Contains(default)); + Assert.False(collection.Contains(values[3])); + + T sentinel = values[2]; + T[] destArr = { sentinel, sentinel, sentinel, sentinel, sentinel, sentinel }; + collection.CopyTo(destArr, 1); + Assert.Equal(sentinel, destArr[0]); + Assert.Equal(values[0], destArr[1]); + Assert.Equal(nullValue, destArr[2]); + Assert.Equal(values[1], destArr[3]); + Assert.Equal(values[0], destArr[4]); + Assert.Equal(sentinel, destArr[0]); + } + + [Fact] + public void ContainsDoesNotMatchDefaultValueInArrayWithNullValue() + { + Int64Array array = new Int64Array.Builder().Append(1).Append(2).AppendNull().Build(); + Assert.NotNull(array); + var collection = (ICollection)array; + + Assert.True(collection.Contains(1)); + Assert.True(collection.Contains(2)); + Assert.True(collection.Contains(default)); + // A null value is stored as a null bit in the null bitmap, and a default value in the value buffer. Check that we do not match the default value. + Assert.False(collection.Contains(0)); } [Fact] diff --git a/csharp/test/Apache.Arrow.Tests/Date32ArrayTests.cs b/csharp/test/Apache.Arrow.Tests/Date32ArrayTests.cs index 2a674b942c17b..6e4742cad06f2 100644 --- a/csharp/test/Apache.Arrow.Tests/Date32ArrayTests.cs +++ b/csharp/test/Apache.Arrow.Tests/Date32ArrayTests.cs @@ -131,7 +131,7 @@ public void AppendGivesUtcDate(DateTimeOffset dateTimeOffset) public class AppendDateOnly { [Theory] - [MemberData(nameof(GetDateOnlyData), MemberType = typeof(Date64ArrayTests))] + [MemberData(nameof(GetDateOnlyData), MemberType = typeof(Date32ArrayTests))] public void AppendDateGivesSameDate(DateOnly date) { // Arrange diff --git a/csharp/test/Apache.Arrow.Tests/DurationArrayTests.cs b/csharp/test/Apache.Arrow.Tests/DurationArrayTests.cs index 59080d739b10b..412f67de5f0fb 100644 --- a/csharp/test/Apache.Arrow.Tests/DurationArrayTests.cs +++ b/csharp/test/Apache.Arrow.Tests/DurationArrayTests.cs @@ -115,7 +115,7 @@ public void AppendTimeSpanGivesSameTimeSpan(TimeSpan? timeSpan, DurationType typ Assert.Equal(timeSpan, array.GetTimeSpan(0)); IReadOnlyList asList = array; - Assert.Equal(1, asList.Count); + Assert.Single(asList); Assert.Equal(timeSpan, asList[0]); } } diff --git a/csharp/test/Apache.Arrow.Tests/Extensions/DateTimeOffsetExtensions.cs b/csharp/test/Apache.Arrow.Tests/Extensions/DateTimeOffsetExtensions.cs index 4375c39cdfaf6..01809735d14c9 100644 --- a/csharp/test/Apache.Arrow.Tests/Extensions/DateTimeOffsetExtensions.cs +++ b/csharp/test/Apache.Arrow.Tests/Extensions/DateTimeOffsetExtensions.cs @@ -14,8 +14,6 @@ // limitations under the License. using System; -using System.Collections.Generic; -using System.Text; namespace Apache.Arrow.Tests { diff --git a/csharp/test/Apache.Arrow.Tests/UnionArrayTests.cs b/csharp/test/Apache.Arrow.Tests/UnionArrayTests.cs index 712a87a252b6c..c603ef63a4d3e 100644 --- a/csharp/test/Apache.Arrow.Tests/UnionArrayTests.cs +++ b/csharp/test/Apache.Arrow.Tests/UnionArrayTests.cs @@ -110,7 +110,7 @@ private static void CompareValue(UnionArray originalArray, int originalIndex, Un } private static void CompareFieldValue(byte typeId, UnionArray originalArray, int originalIndex, UnionArray slicedArray, int sliceIndex) - where T: struct + where T : struct, IEquatable where TArray : PrimitiveArray { if (originalArray is DenseUnionArray denseOriginalArray) diff --git a/dev/README.md b/dev/README.md index db9a10d527334..b04dd35b1c1ff 100644 --- a/dev/README.md +++ b/dev/README.md @@ -48,17 +48,32 @@ After installed, it runs the merge script. you'll have to install Python dependencies yourself and then run `dev/merge_arrow_pr.py` directly.) +The merge script requires tokens for access control. There are two options +for configuring your tokens: environment variables or a configuration file. + +> Note: Arrow only requires a GitHub token. Parquet can use GitHub or +JIRA tokens. + +#### Pass tokens via Environment Variables + The merge script uses the GitHub REST API. You must set a -`ARROW_GITHUB_API_TOKEN` environment variable to use a -[Personal Access Token](https://docs.github.com/en/authentication/keeping-your-account-and-data-secure/creating-a-personal-access-token). +`ARROW_GITHUB_API_TOKEN` environment variable to use a +[Personal Access Token](https://docs.github.com/en/authentication/keeping-your-account-and-data-secure/creating-a-personal-access-token). You need to add `workflow` scope to the Personal Access Token. -You can specify the +You can specify the [Personal Access Token](https://confluence.atlassian.com/enterprise/using-personal-access-tokens-1026032365.html) -of your JIRA account in the +of your JIRA account in the `APACHE_JIRA_TOKEN` environment variable. If the variable is not set, the script will ask you for it. +#### Pass tokens via configuration file + +``` +cp ./merge.conf.sample ~/.config/arrow/merge.conf +``` +Update your new `merge.conf` file with your Personal Access Tokens. + Example output: ```text diff --git a/dev/archery/README.md b/dev/archery/README.md index 9991e7402d832..0b9d5c743d122 100644 --- a/dev/archery/README.md +++ b/dev/archery/README.md @@ -23,7 +23,7 @@ Archery is documented on the Arrow website: * [Daily development using Archery](https://arrow.apache.org/docs/developers/continuous_integration/archery.html) * [Using Archery and Crossbow](https://arrow.apache.org/docs/developers/continuous_integration/crossbow.html) -* [Using Archer and Docker](https://arrow.apache.org/docs/developers/continuous_integration/docker.html) +* [Using Archery and Docker](https://arrow.apache.org/docs/developers/continuous_integration/docker.html) # Installing Archery diff --git a/dev/archery/archery/cli.py b/dev/archery/archery/cli.py index 5fa41e28a3208..cd746f9c4499a 100644 --- a/dev/archery/archery/cli.py +++ b/dev/archery/archery/cli.py @@ -261,6 +261,7 @@ def build(ctx, src, build_dir, force, targets, **kwargs): "Check all sources files for license texts via Apache RAT."), LintCheck('r', "Lint R files."), LintCheck('docker', "Lint Dockerfiles with hadolint."), + LintCheck('docs', "Lint docs with sphinx-lint."), ] @@ -285,9 +286,10 @@ def decorate_lint_command(cmd): help="Run IWYU on all C++ files if enabled") @click.option("-a", "--all", is_flag=True, default=False, help="Enable all checks.") +@click.argument("path", required=False) @decorate_lint_command @click.pass_context -def lint(ctx, src, fix, iwyu_all, **checks): +def lint(ctx, src, fix, iwyu_all, path, **checks): if checks.pop('all'): # "--all" is given => enable all non-selected checks for k, v in checks.items(): @@ -297,7 +299,7 @@ def lint(ctx, src, fix, iwyu_all, **checks): raise click.UsageError( "Need to enable at least one lint check (try --help)") try: - linter(src, fix, iwyu_all=iwyu_all, **checks) + linter(src, fix, iwyu_all=iwyu_all, path=path, **checks) except LintValidationException: sys.exit(1) @@ -736,6 +738,9 @@ def _set_default(opt, default): help='Include JavaScript in integration tests') @click.option('--with-go', type=bool, default=False, help='Include Go in integration tests') +@click.option('--with-nanoarrow', type=bool, default=False, + help='Include nanoarrow in integration tests', + envvar="ARCHERY_INTEGRATION_WITH_NANOARROW") @click.option('--with-rust', type=bool, default=False, help='Include Rust in integration tests', envvar="ARCHERY_INTEGRATION_WITH_RUST") @@ -774,7 +779,7 @@ def integration(with_all=False, random_seed=12345, **args): gen_path = args['write_generated_json'] - languages = ['cpp', 'csharp', 'java', 'js', 'go', 'rust'] + languages = ['cpp', 'csharp', 'java', 'js', 'go', 'nanoarrow', 'rust'] formats = ['ipc', 'flight', 'c_data'] enabled_languages = 0 diff --git a/dev/archery/archery/crossbow/core.py b/dev/archery/archery/crossbow/core.py index c85f1f754b997..0b5d242bbaccf 100644 --- a/dev/archery/archery/crossbow/core.py +++ b/dev/archery/archery/crossbow/core.py @@ -427,8 +427,14 @@ def create_branch(self, branch_name, files, parents=None, message='', return branch def create_tag(self, tag_name, commit_id, message=''): + git_object_commit = ( + pygit2.GIT_OBJECT_COMMIT + if getattr(pygit2, 'GIT_OBJECT_COMMIT') + else pygit2.GIT_OBJ_COMMIT + ) tag_id = self.repo.create_tag(tag_name, commit_id, - pygit2.GIT_OBJ_COMMIT, self.signature, + git_object_commit, + self.signature, message) # append to the pushable references diff --git a/dev/archery/archery/docker/core.py b/dev/archery/archery/docker/core.py index 7376bb0a3b72d..cb831060022a4 100644 --- a/dev/archery/archery/docker/core.py +++ b/dev/archery/archery/docker/core.py @@ -371,6 +371,10 @@ def run(self, service_name, command=None, *, env=None, volumes=None, v = "{}:{}".format(v['source'], v['target']) args.extend(['-v', v]) + # append capabilities from the compose conf + for c in service.get('cap_add', []): + args.extend([f'--cap-add={c}']) + # infer whether an interactive shell is desired or not if command in ['cmd.exe', 'bash', 'sh', 'powershell']: args.append('-it') diff --git a/dev/archery/archery/integration/datagen.py b/dev/archery/archery/integration/datagen.py index 5cae907a4aa71..f6302165cd5a0 100644 --- a/dev/archery/archery/integration/datagen.py +++ b/dev/archery/archery/integration/datagen.py @@ -1928,17 +1928,20 @@ def _temp_path(): .skip_tester('C#') .skip_tester('Java') .skip_tester('JS') + .skip_tester('nanoarrow') .skip_tester('Rust'), generate_binary_view_case() .skip_tester('Java') .skip_tester('JS') + .skip_tester('nanoarrow') .skip_tester('Rust'), generate_list_view_case() .skip_tester('C#') # Doesn't support large list views .skip_tester('Java') .skip_tester('JS') + .skip_tester('nanoarrow') .skip_tester('Rust'), generate_extension_case() diff --git a/dev/archery/archery/integration/runner.py b/dev/archery/archery/integration/runner.py index 5b66842b25926..0ea244720cc1d 100644 --- a/dev/archery/archery/integration/runner.py +++ b/dev/archery/archery/integration/runner.py @@ -36,6 +36,7 @@ from .tester_java import JavaTester from .tester_js import JSTester from .tester_csharp import CSharpTester +from .tester_nanoarrow import NanoarrowTester from .util import guid, printer from .util import SKIP_C_ARRAY, SKIP_C_SCHEMA, SKIP_FLIGHT, SKIP_IPC from ..utils.source import ARROW_ROOT_DEFAULT @@ -541,8 +542,8 @@ def get_static_json_files(): def run_all_tests(with_cpp=True, with_java=True, with_js=True, with_csharp=True, with_go=True, with_rust=False, - run_ipc=False, run_flight=False, run_c_data=False, - tempdir=None, **kwargs): + with_nanoarrow=False, run_ipc=False, run_flight=False, + run_c_data=False, tempdir=None, **kwargs): tempdir = tempdir or tempfile.mkdtemp(prefix='arrow-integration-') testers: List[Tester] = [] @@ -562,6 +563,9 @@ def run_all_tests(with_cpp=True, with_java=True, with_js=True, if with_go: testers.append(GoTester(**kwargs)) + if with_nanoarrow: + testers.append(NanoarrowTester(**kwargs)) + if with_rust: testers.append(RustTester(**kwargs)) diff --git a/dev/archery/archery/integration/tester_csharp.py b/dev/archery/archery/integration/tester_csharp.py index 9aab5b0b28ef9..02ced0701deaf 100644 --- a/dev/archery/archery/integration/tester_csharp.py +++ b/dev/archery/archery/integration/tester_csharp.py @@ -28,7 +28,7 @@ _EXE_PATH = os.path.join(_ARTIFACTS_PATH, "Apache.Arrow.IntegrationTest", - "Debug/net7.0/Apache.Arrow.IntegrationTest", + "Debug/net8.0/Apache.Arrow.IntegrationTest", ) _clr_loaded = False @@ -44,10 +44,10 @@ def _load_clr(): import clr clr.AddReference( f"{_ARTIFACTS_PATH}/Apache.Arrow.IntegrationTest/" - f"Debug/net7.0/Apache.Arrow.IntegrationTest.dll") + f"Debug/net8.0/Apache.Arrow.IntegrationTest.dll") clr.AddReference( f"{_ARTIFACTS_PATH}/Apache.Arrow.Tests/" - f"Debug/net7.0/Apache.Arrow.Tests.dll") + f"Debug/net8.0/Apache.Arrow.Tests.dll") from Apache.Arrow.IntegrationTest import CDataInterface CDataInterface.Initialize() diff --git a/dev/archery/archery/integration/tester_java.py b/dev/archery/archery/integration/tester_java.py index 8e7a0bb99f9de..9b14c6939cde8 100644 --- a/dev/archery/archery/integration/tester_java.py +++ b/dev/archery/archery/integration/tester_java.py @@ -18,17 +18,23 @@ import contextlib import functools import os +from pathlib import Path import subprocess from . import cdata from .tester import Tester, CDataExporter, CDataImporter from .util import run_cmd, log -from ..utils.source import ARROW_ROOT_DEFAULT + + +ARROW_BUILD_ROOT = os.environ.get( + 'ARROW_BUILD_ROOT', + Path(__file__).resolve().parents[4] +) def load_version_from_pom(): import xml.etree.ElementTree as ET - tree = ET.parse(os.path.join(ARROW_ROOT_DEFAULT, 'java', 'pom.xml')) + tree = ET.parse(os.path.join(ARROW_BUILD_ROOT, 'java', 'pom.xml')) tag_pattern = '{http://maven.apache.org/POM/4.0.0}version' version_tag = list(tree.getroot().findall(tag_pattern))[0] return version_tag.text @@ -48,7 +54,7 @@ def load_version_from_pom(): _ARROW_TOOLS_JAR = os.environ.get( "ARROW_JAVA_INTEGRATION_JAR", os.path.join( - ARROW_ROOT_DEFAULT, + ARROW_BUILD_ROOT, "java/tools/target", f"arrow-tools-{_arrow_version}-jar-with-dependencies.jar" ) @@ -56,7 +62,7 @@ def load_version_from_pom(): _ARROW_C_DATA_JAR = os.environ.get( "ARROW_C_DATA_JAVA_INTEGRATION_JAR", os.path.join( - ARROW_ROOT_DEFAULT, + ARROW_BUILD_ROOT, "java/c/target", f"arrow-c-data-{_arrow_version}.jar" ) @@ -64,7 +70,7 @@ def load_version_from_pom(): _ARROW_FLIGHT_JAR = os.environ.get( "ARROW_FLIGHT_JAVA_INTEGRATION_JAR", os.path.join( - ARROW_ROOT_DEFAULT, + ARROW_BUILD_ROOT, "java/flight/flight-integration-tests/target", f"flight-integration-tests-{_arrow_version}-jar-with-dependencies.jar" ) diff --git a/dev/archery/archery/integration/tester_js.py b/dev/archery/archery/integration/tester_js.py index c7f363ba54687..dcf56f9a5ab6b 100644 --- a/dev/archery/archery/integration/tester_js.py +++ b/dev/archery/archery/integration/tester_js.py @@ -16,13 +16,17 @@ # under the License. import os +from pathlib import Path from .tester import Tester from .util import run_cmd, log -from ..utils.source import ARROW_ROOT_DEFAULT -ARROW_JS_ROOT = os.path.join(ARROW_ROOT_DEFAULT, 'js') +ARROW_BUILD_ROOT = os.environ.get( + 'ARROW_BUILD_ROOT', + Path(__file__).resolve().parents[4] +) +ARROW_JS_ROOT = os.path.join(ARROW_BUILD_ROOT, 'js') _EXE_PATH = os.path.join(ARROW_JS_ROOT, 'bin') _VALIDATE = os.path.join(_EXE_PATH, 'integration.ts') _JSON_TO_ARROW = os.path.join(_EXE_PATH, 'json-to-arrow.ts') diff --git a/dev/archery/archery/integration/tester_nanoarrow.py b/dev/archery/archery/integration/tester_nanoarrow.py new file mode 100644 index 0000000000000..30ff1bb6e50a7 --- /dev/null +++ b/dev/archery/archery/integration/tester_nanoarrow.py @@ -0,0 +1,148 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import functools +import os + +from . import cdata +from .tester import Tester, CDataExporter, CDataImporter +from ..utils.source import ARROW_ROOT_DEFAULT + + +_NANOARROW_PATH = os.environ.get( + "ARROW_NANOARROW_PATH", + os.path.join(ARROW_ROOT_DEFAULT, "nanoarrow/cdata"), +) + +_INTEGRATION_DLL = os.path.join( + _NANOARROW_PATH, "libnanoarrow_c_data_integration" + cdata.dll_suffix +) + + +class NanoarrowTester(Tester): + PRODUCER = False + CONSUMER = False + FLIGHT_SERVER = False + FLIGHT_CLIENT = False + C_DATA_SCHEMA_EXPORTER = True + C_DATA_ARRAY_EXPORTER = True + C_DATA_SCHEMA_IMPORTER = True + C_DATA_ARRAY_IMPORTER = True + + name = "nanoarrow" + + def validate(self, json_path, arrow_path, quirks=None): + raise NotImplementedError() + + def json_to_file(self, json_path, arrow_path): + raise NotImplementedError() + + def stream_to_file(self, stream_path, file_path): + raise NotImplementedError() + + def file_to_stream(self, file_path, stream_path): + raise NotImplementedError() + + def make_c_data_exporter(self): + return NanoarrowCDataExporter(self.debug, self.args) + + def make_c_data_importer(self): + return NanoarrowCDataImporter(self.debug, self.args) + + +_nanoarrow_c_data_entrypoints = """ + const char* nanoarrow_CDataIntegration_ExportSchemaFromJson( + const char* json_path, struct ArrowSchema* out); + + const char* nanoarrow_CDataIntegration_ImportSchemaAndCompareToJson( + const char* json_path, struct ArrowSchema* schema); + + const char* nanoarrow_CDataIntegration_ExportBatchFromJson( + const char* json_path, int num_batch, struct ArrowArray* out); + + const char* nanoarrow_CDataIntegration_ImportBatchAndCompareToJson( + const char* json_path, int num_batch, struct ArrowArray* batch); + + int64_t nanoarrow_BytesAllocated(void); + """ + + +@functools.lru_cache +def _load_ffi(ffi, lib_path=_INTEGRATION_DLL): + ffi.cdef(_nanoarrow_c_data_entrypoints) + dll = ffi.dlopen(lib_path) + return dll + + +class _CDataBase: + def __init__(self, debug, args): + self.debug = debug + self.args = args + self.ffi = cdata.ffi() + self.dll = _load_ffi(self.ffi) + + def _check_nanoarrow_error(self, na_error): + """ + Check a `const char*` error return from an integration entrypoint. + + A null means success, a non-empty string is an error message. + The string is statically allocated on the nanoarrow side and does not + need to be released. + """ + assert self.ffi.typeof(na_error) is self.ffi.typeof("const char*") + if na_error != self.ffi.NULL: + error = self.ffi.string(na_error).decode("utf8", errors="replace") + raise RuntimeError(f"nanoarrow C Data Integration call failed: {error}") + + +class NanoarrowCDataExporter(CDataExporter, _CDataBase): + def export_schema_from_json(self, json_path, c_schema_ptr): + na_error = self.dll.nanoarrow_CDataIntegration_ExportSchemaFromJson( + str(json_path).encode(), c_schema_ptr + ) + self._check_nanoarrow_error(na_error) + + def export_batch_from_json(self, json_path, num_batch, c_array_ptr): + na_error = self.dll.nanoarrow_CDataIntegration_ExportBatchFromJson( + str(json_path).encode(), num_batch, c_array_ptr + ) + self._check_nanoarrow_error(na_error) + + @property + def supports_releasing_memory(self): + return True + + def record_allocation_state(self): + return self.dll.nanoarrow_BytesAllocated() + + +class NanoarrowCDataImporter(CDataImporter, _CDataBase): + def import_schema_and_compare_to_json(self, json_path, c_schema_ptr): + na_error = self.dll.nanoarrow_CDataIntegration_ImportSchemaAndCompareToJson( + str(json_path).encode(), c_schema_ptr + ) + self._check_nanoarrow_error(na_error) + + def import_batch_and_compare_to_json(self, json_path, num_batch, c_array_ptr): + na_error = self.dll.nanoarrow_CDataIntegration_ImportBatchAndCompareToJson( + str(json_path).encode(), num_batch, c_array_ptr + ) + self._check_nanoarrow_error(na_error) + + @property + def supports_releasing_memory(self): + return True diff --git a/dev/archery/archery/utils/lint.py b/dev/archery/archery/utils/lint.py index 15f22ca2e6e5c..c9d05fffd9168 100644 --- a/dev/archery/archery/utils/lint.py +++ b/dev/archery/archery/utils/lint.py @@ -157,13 +157,13 @@ def cmake_linter(src, fix=False): 'go/**/CMakeLists.txt', 'java/**/CMakeLists.txt', 'matlab/**/CMakeLists.txt', - 'python/CMakeLists.txt', + 'python/**/CMakeLists.txt', ], exclude_patterns=[ 'cpp/cmake_modules/FindNumPy.cmake', 'cpp/cmake_modules/FindPythonLibsNew.cmake', 'cpp/cmake_modules/UseCython.cmake', - 'cpp/src/arrow/util/config.h.cmake', + 'cpp/src/arrow/util/*.h.cmake', ] ) method = cmake_format.fix if fix else cmake_format.check @@ -436,10 +436,55 @@ def docker_linter(src): cwd=src.path)) -def linter(src, fix=False, *, clang_format=False, cpplint=False, +class SphinxLint(Command): + def __init__(self, src, path=None, sphinx_lint_bin=None, disable=None, enable=None): + self.src = src + self.path = path + self.bin = default_bin(sphinx_lint_bin, "sphinx-lint") + self.disable = disable or "all" + self.enable = enable + + def lint(self, *args, check=False): + docs_path = os.path.join(self.src.path, "docs") + + args = [] + + if self.disable: + args.extend(["--disable", self.disable]) + + if self.enable: + args.extend(["--enable", self.enable]) + + if self.path is not None: + args.extend([self.path]) + else: + args.extend([docs_path]) + + return self.run(*args, check=check) + + +def docs_linter(src, path=None): + """Run sphinx-lint on docs.""" + logger.info("Running docs linter (sphinx-lint)") + + sphinx_lint = SphinxLint( + src, + path=path, + disable="all", + enable="trailing-whitespace,missing-final-newline" + ) + + if not sphinx_lint.available: + logger.error("sphinx-lint linter requested but sphinx-lint binary not found") + return + + yield LintResult.from_cmd(sphinx_lint.lint()) + + +def linter(src, fix=False, path=None, *, clang_format=False, cpplint=False, clang_tidy=False, iwyu=False, iwyu_all=False, python=False, numpydoc=False, cmake_format=False, rat=False, - r=False, docker=False): + r=False, docker=False, docs=False): """Run all linters.""" with tmpdir(prefix="arrow-lint-") as root: build_dir = os.path.join(root, "cpp-build") @@ -481,6 +526,9 @@ def linter(src, fix=False, *, clang_format=False, cpplint=False, if docker: results.extend(docker_linter(src)) + if docs: + results.extend(docs_linter(src, path)) + # Raise error if one linter failed, ensuring calling code can exit with # non-zero. for result in results: diff --git a/dev/archery/setup.py b/dev/archery/setup.py index 23a1600910d04..cd3e2e9ca0834 100755 --- a/dev/archery/setup.py +++ b/dev/archery/setup.py @@ -41,7 +41,7 @@ 'integration': ['cffi'], 'integration-java': ['jpype1'], 'lint': ['numpydoc==1.1.0', 'autopep8', 'flake8==6.1.0', 'cython-lint', - 'cmake_format==0.6.13'], + 'cmake_format==0.6.13', 'sphinx-lint==0.9.1'], 'numpydoc': ['numpydoc==1.1.0'], 'release': ['pygithub', jinja_req, 'jira', 'semver', 'gitpython'], } diff --git a/dev/conbench_envs/benchmarks.env b/dev/conbench_envs/benchmarks.env index 2a5a9c32a86ec..3af29491a8345 100644 --- a/dev/conbench_envs/benchmarks.env +++ b/dev/conbench_envs/benchmarks.env @@ -31,7 +31,6 @@ ARROW_HOME=$CONDA_PREFIX ARROW_INSTALL_NAME_RPATH=ON ARROW_JEMALLOC=OFF ARROW_MIMALLOC=ON -ARROW_NO_DEPRECATED_API=ON ARROW_ORC=ON ARROW_PARQUET=ON ARROW_PYTHON=ON diff --git a/dev/conbench_envs/hooks.sh b/dev/conbench_envs/hooks.sh index a77189764aed3..0745357d2c0d3 100755 --- a/dev/conbench_envs/hooks.sh +++ b/dev/conbench_envs/hooks.sh @@ -59,7 +59,8 @@ build_arrow_cpp() { } build_arrow_python() { - ci/scripts/python_build.sh $(pwd) $(pwd) + mkdir -p /tmp/arrow + ci/scripts/python_build.sh $(pwd) /tmp/arrow } build_arrow_r() { @@ -69,7 +70,8 @@ build_arrow_r() { } build_arrow_java() { - ci/scripts/java_build.sh $(pwd) $(pwd) + mkdir -p /tmp/arrow + ci/scripts/java_build.sh $(pwd) /tmp/arrow } install_archery() { diff --git a/dev/merge_arrow_pr.py b/dev/merge_arrow_pr.py index 25d3372d8b4d3..344d943fd87e1 100755 --- a/dev/merge_arrow_pr.py +++ b/dev/merge_arrow_pr.py @@ -306,15 +306,11 @@ def version_tuple(x): # Only suggest versions starting with a number, like 0.x but not JS-0.x mainline_versions = all_versions - mainline_non_patch_versions = [] - for v in mainline_versions: - (major, minor, patch) = v.split(".") - if patch == "0": - mainline_non_patch_versions.append(v) - - if len(mainline_versions) > len(mainline_non_patch_versions): - # If there is a non-patch release, suggest that instead - mainline_versions = mainline_non_patch_versions + major_versions = [v for v in mainline_versions if v.endswith('.0.0')] + + if len(mainline_versions) > len(major_versions): + # If there is a future major release, suggest that + mainline_versions = major_versions mainline_versions = [v for v in mainline_versions if f"maint-{v}" not in maintenance_branches] diff --git a/dev/release/01-prepare-test.rb b/dev/release/01-prepare-test.rb index 8fb23f45f0f3a..fbd0b2996077c 100644 --- a/dev/release/01-prepare-test.rb +++ b/dev/release/01-prepare-test.rb @@ -51,6 +51,45 @@ def prepare(*targets) sh(env, "dev/release/01-prepare.sh", @release_version, @next_version, "0") end + data(:release_type, [nil, :major, :minor, :patch]) + def test_deb_package_names + omit_on_release_branch + current_commit = git_current_commit + stdout = prepare("DEB_PACKAGE_NAMES") + changes = parse_patch(git("log", "-p", "#{current_commit}..")) + sampled_changes = changes.collect do |change| + first_hunk = change[:hunks][0] + first_removed_line = first_hunk.find { |line| line.start_with?("-") } + first_added_line = first_hunk.find { |line| line.start_with?("+") } + { + sampled_diff: [first_removed_line, first_added_line], + path: change[:path], + } + end + case release_type + when :major, :minor + expected_changes = [ + { + sampled_diff: [ + "-Package: libarrow#{@snapshot_so_version}", + "+Package: libarrow#{@so_version}", + ], + path: "dev/tasks/linux-packages/apache-arrow/debian/control.in", + }, + { + sampled_diff: [ + "- - libarrow-acero#{@snapshot_so_version}-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb", + "+ - libarrow-acero#{@so_version}-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb", + ], + path: "dev/tasks/tasks.yml", + }, + ] + else + expected_changes = [] + end + assert_equal(expected_changes, sampled_changes, "Output:\n#{stdout}") + end + def test_linux_packages user = "Arrow Developers" email = "dev@arrow.apache.org" @@ -96,7 +135,7 @@ def test_linux_packages assert_equal(expected_changes, sampled_changes, "Output:\n#{stdout}") end - data(:release_type, [:major, :minor, :patch]) + data(:next_release_type, [:major, :minor, :patch]) def test_version_pre_tag omit_on_release_branch @@ -108,6 +147,13 @@ def test_version_pre_tag "+version = '#{@release_version}'"], ], }, + { + path: "c_glib/vcpkg.json", + hunks: [ + ["- \"version-string\": \"#{@snapshot_version}\",", + "+ \"version-string\": \"#{@release_version}\","], + ], + }, { path: "ci/scripts/PKGBUILD", hunks: [ @@ -151,7 +197,7 @@ def test_version_pre_tag ], }, ] - unless release_type == :patch + unless next_release_type == :patch expected_changes += [ { path: "docs/source/_static/versions.json", @@ -208,10 +254,10 @@ def test_version_pre_tag ], }, { - path: "python/setup.py", + path: "python/pyproject.toml", hunks: [ - ["-default_version = '#{@snapshot_version}'", - "+default_version = '#{@release_version}'"], + ["-fallback_version = '#{@release_version}a0'", + "+fallback_version = '#{@release_version}'"], ], }, { @@ -229,7 +275,7 @@ def test_version_pre_tag ], }, ] - if release_type == :major + if next_release_type == :major expected_changes += [ { path: "r/pkgdown/assets/versions.json", diff --git a/dev/release/01-prepare.sh b/dev/release/01-prepare.sh index 01fa2f3d80345..e4c62e6323c23 100755 --- a/dev/release/01-prepare.sh +++ b/dev/release/01-prepare.sh @@ -39,6 +39,7 @@ release_candidate_branch="release-${version}-rc${rc_number}" : ${PREPARE_DEFAULT:=1} : ${PREPARE_CHANGELOG:=${PREPARE_DEFAULT}} +: ${PREPARE_DEB_PACKAGE_NAMES:=${PREPARE_DEFAULT}} : ${PREPARE_LINUX_PACKAGES:=${PREPARE_DEFAULT}} : ${PREPARE_VERSION_PRE_TAG:=${PREPARE_DEFAULT}} : ${PREPARE_BRANCH:=${PREPARE_DEFAULT}} @@ -78,16 +79,12 @@ if [ ${PREPARE_CHANGELOG} -gt 0 ]; then git commit -m "MINOR: [Release] Update CHANGELOG.md for $version" fi +if [ ${PREPARE_DEB_PACKAGE_NAMES} -gt 0 ]; then + update_deb_package_names "$(current_version)" "${version}" +fi + if [ ${PREPARE_LINUX_PACKAGES} -gt 0 ]; then - echo "Updating .deb/.rpm changelogs for $version" - cd $SOURCE_DIR/../tasks/linux-packages - rake \ - version:update \ - ARROW_RELEASE_TIME="$(date +%Y-%m-%dT%H:%M:%S%z)" \ - ARROW_VERSION=${version} - git add */debian*/changelog */yum/*.spec.in - git commit -m "MINOR: [Release] Update .deb/.rpm changelogs for $version" - cd - + update_linux_packages "${version}" "$(date +%Y-%m-%dT%H:%M:%S%z)" fi if [ ${PREPARE_VERSION_PRE_TAG} -gt 0 ]; then diff --git a/dev/release/binary-task.rb b/dev/release/binary-task.rb index 8fcdcf1f5f442..c2386a1f52f21 100644 --- a/dev/release/binary-task.rb +++ b/dev/release/binary-task.rb @@ -1083,7 +1083,6 @@ def apt_release_repositories_dir def available_apt_targets [ - ["debian", "bullseye", "main"], ["debian", "bookworm", "main"], ["debian", "trixie", "main"], ["ubuntu", "focal", "main"], @@ -2111,8 +2110,6 @@ def apt_test_targets_default # Disable arm64 targets by default for now # because they require some setups on host. [ - "debian-bullseye", - # "debian-bullseye-arm64", "debian-bookworm", # "debian-bookworm-arm64", "debian-trixie", diff --git a/dev/release/post-08-docs.sh b/dev/release/post-08-docs.sh index 1e457c95c033c..58a462551f199 100755 --- a/dev/release/post-08-docs.sh +++ b/dev/release/post-08-docs.sh @@ -72,14 +72,28 @@ fi # delete current stable docs and restore all previous versioned docs rm -rf docs/* git checkout "${versioned_paths[@]}" +# Download and untar released docs in a temp folder +rm -rf docs_new +mkdir docs_new +pushd docs_new curl \ --fail \ --location \ --remote-name \ https://apache.jfrog.io/artifactory/arrow/docs/${version}/docs.tar.gz tar xvf docs.tar.gz -rm -f docs.tar.gz -git checkout docs/c_glib/index.html +# Update DOCUMENTATION_OPTIONS.show_version_warning_banner +find docs \ + -type f \ + -exec \ + sed -i.bak \ + -e "s/DOCUMENTATION_OPTIONS.show_version_warning_banner = true/DOCUMENTATION_OPTIONS.show_version_warning_banner = false/g" \ + {} \; +find ./ -name '*.bak' -delete +popd +mv docs_new/docs/* docs/ +rm -rf docs_new + if [ "$is_major_release" = "yes" ] ; then previous_series=${previous_version%.*} mv docs_temp docs/${previous_series} diff --git a/dev/release/post-11-bump-versions-test.rb b/dev/release/post-11-bump-versions-test.rb index 78d9320bfb312..8ad404ef33202 100644 --- a/dev/release/post-11-bump-versions-test.rb +++ b/dev/release/post-11-bump-versions-test.rb @@ -74,7 +74,7 @@ def bump_versions(*targets) end end - data(:release_type, [:major, :minor, :patch]) + data(:next_release_type, [:major, :minor, :patch]) def test_version_post_tag omit_on_release_branch @@ -86,6 +86,13 @@ def test_version_post_tag "+version = '#{@next_snapshot_version}'"], ], }, + { + path: "c_glib/vcpkg.json", + hunks: [ + ["- \"version-string\": \"#{@snapshot_version}\",", + "+ \"version-string\": \"#{@next_snapshot_version}\","], + ], + }, { path: "ci/scripts/PKGBUILD", hunks: [ @@ -129,7 +136,7 @@ def test_version_post_tag ], }, ] - unless release_type == :patch + unless next_release_type == :patch expected_changes += [ { path: "docs/source/_static/versions.json", @@ -172,10 +179,10 @@ def test_version_post_tag ], }, { - path: "python/setup.py", + path: "python/pyproject.toml", hunks: [ - ["-default_version = '#{@snapshot_version}'", - "+default_version = '#{@next_snapshot_version}'"], + ["-fallback_version = '#{@release_version}a0'", + "+fallback_version = '#{@next_version}a0'"], ], }, { @@ -195,8 +202,14 @@ def test_version_post_tag ], }, ] - if release_type == :major + if next_release_type == :major expected_changes += [ + { + path: "c_glib/tool/generate-version-header.py", + hunks: [ + ["+ (#{@next_major_version}, 0),"], + ], + }, { path: "docs/source/index.rst", hunks: [ @@ -263,7 +276,7 @@ def test_version_post_tag import_path = "github.com/apache/arrow/go/v#{@snapshot_major_version}" hunks = [] - if release_type == :major + if next_release_type == :major lines = File.readlines(path, chomp: true) target_lines = lines.each_with_index.select do |line, i| line.include?(import_path) diff --git a/dev/release/post-11-bump-versions.sh b/dev/release/post-11-bump-versions.sh index 93eb15e0921c8..422821a66bde5 100755 --- a/dev/release/post-11-bump-versions.sh +++ b/dev/release/post-11-bump-versions.sh @@ -41,10 +41,6 @@ version=$1 next_version=$2 next_version_snapshot="${next_version}-SNAPSHOT" -current_version=$(grep ARROW_VERSION "${SOURCE_DIR}/../../cpp/CMakeLists.txt" | \ - head -n1 | \ - grep -E -o '([0-9]+\.[0-9]+\.[0-9]+)') - case "${version}" in *.0.0) is_major_release=1 @@ -68,52 +64,12 @@ if [ ${BUMP_VERSION_POST_TAG} -gt 0 ]; then fi if [ ${BUMP_DEB_PACKAGE_NAMES} -gt 0 ] && \ - [ "${next_version}" != "${current_version}" ]; then - echo "Updating .deb package names for ${next_version}" - so_version() { - local version=$1 - local major_version=$(echo $version | sed -E -e 's/^([0-9]+)\.[0-9]+\.[0-9]+$/\1/') - local minor_version=$(echo $version | sed -E -e 's/^[0-9]+\.([0-9]+)\.[0-9]+$/\1/') - expr ${major_version} \* 100 + ${minor_version} - } - deb_lib_suffix=$(so_version $version) - next_deb_lib_suffix=$(so_version $next_version) - if [ "${deb_lib_suffix}" != "${next_deb_lib_suffix}" ]; then - cd $SOURCE_DIR/../tasks/linux-packages/apache-arrow - for target in debian*/lib*${deb_lib_suffix}.install; do - git mv \ - ${target} \ - $(echo $target | sed -e "s/${deb_lib_suffix}/${next_deb_lib_suffix}/") - done - deb_lib_suffix_substitute_pattern="s/(lib(arrow|gandiva|parquet)[-a-z]*)${deb_lib_suffix}/\\1${next_deb_lib_suffix}/g" - sed -i.bak -E -e "${deb_lib_suffix_substitute_pattern}" debian*/control* - rm -f debian*/control*.bak - git add debian*/control* - cd - - cd $SOURCE_DIR/../tasks/ - sed -i.bak -E -e "${deb_lib_suffix_substitute_pattern}" tasks.yml - rm -f tasks.yml.bak - git add tasks.yml - cd - - cd $SOURCE_DIR - sed -i.bak -E -e "${deb_lib_suffix_substitute_pattern}" rat_exclude_files.txt - rm -f rat_exclude_files.txt.bak - git add rat_exclude_files.txt - git commit -m "MINOR: [Release] Update .deb package names for $next_version" - cd - - fi + [ "${next_version}" != "$(current_version)" ]; then + update_deb_package_names "${version}" "${next_version}" fi if [ ${BUMP_LINUX_PACKAGES} -gt 0 ]; then - echo "Updating .deb/.rpm changelogs for $version" - cd $SOURCE_DIR/../tasks/linux-packages - rake \ - version:update \ - ARROW_RELEASE_TIME="$(git log -n1 --format=%aI apache-arrow-${version})" \ - ARROW_VERSION=${version} - git add */debian*/changelog */yum/*.spec.in - git commit -m "MINOR: [Release] Update .deb/.rpm changelogs for $version" - cd - + update_linux_packages "${version}" "$(git log -n1 --format=%aI apache-arrow-${version})" fi if [ ${BUMP_PUSH} -gt 0 ]; then diff --git a/dev/release/rat_exclude_files.txt b/dev/release/rat_exclude_files.txt index f4d7b411c4dc2..ef325090f2f4b 100644 --- a/dev/release/rat_exclude_files.txt +++ b/dev/release/rat_exclude_files.txt @@ -63,6 +63,7 @@ go.work.sum go/go.sum go/arrow/Gopkg.lock go/arrow/flight/gen/flight/*.pb.go +go/arrow/util/util_message/*.pb.go go/arrow/internal/cpu/* go/arrow/type_string.go go/arrow/cdata/test/go.sum @@ -150,3 +151,4 @@ r/tools/nixlibs-allowlist.txt ruby/red-arrow/.yardopts .github/pull_request_template.md swift/data-generator/swift-datagen/go.sum +swift/CDataWGo/go.sum diff --git a/dev/release/test-helper.rb b/dev/release/test-helper.rb index 3b2c3aa6e5874..82400bae2793b 100644 --- a/dev/release/test-helper.rb +++ b/dev/release/test-helper.rb @@ -96,7 +96,11 @@ def parse_patch(patch) module VersionDetectable def release_type - (data || {})[:release_type] || :major + (data || {})[:release_type] + end + + def next_release_type + (data || {})[:next_release_type] || :major end def detect_versions @@ -104,19 +108,36 @@ def detect_versions cpp_cmake_lists = top_dir + "cpp" + "CMakeLists.txt" @snapshot_version = cpp_cmake_lists.read[/ARROW_VERSION "(.+?)"/, 1] @snapshot_major_version = @snapshot_version.split(".")[0] - @release_version = @snapshot_version.gsub(/-SNAPSHOT\z/, "") + @snapshot_so_version = compute_so_version(@snapshot_version.split("-")[0]) + release_version = @snapshot_version.gsub(/-SNAPSHOT\z/, "") + release_version_components = release_version.split(".") + case release_type + when nil + when :major + release_version_components[0].succ! + when :minor + release_version_components[1].succ! + when :patch + release_version_components[2].succ! + else + raise "unknown release type: #{release_type.inspect}" + end + @release_version = release_version_components.join(".") @release_compatible_version = @release_version.split(".")[0, 2].join(".") @so_version = compute_so_version(@release_version) next_version_components = @release_version.split(".") - case release_type + case next_release_type when :major next_version_components[0].succ! + next_version_components[1] = 0 + next_version_components[2] = 0 when :minor next_version_components[1].succ! + next_version_components[2] = 0 when :patch next_version_components[2].succ! else - raise "unknown release type: #{release_type.inspect}" + raise "unknown next release type: #{next_release_type.inspect}" end @next_version = next_version_components.join(".") @next_major_version = @next_version.split(".")[0] diff --git a/dev/release/utils-prepare.sh b/dev/release/utils-prepare.sh index 51367087228a4..c255e728a335b 100644 --- a/dev/release/utils-prepare.sh +++ b/dev/release/utils-prepare.sh @@ -26,10 +26,12 @@ update_versions() { release) local version=${base_version} local r_version=${base_version} + local python_version=${base_version} ;; snapshot) local version=${next_version}-SNAPSHOT local r_version=${base_version}.9000 + local python_version=${next_version}a0 ;; esac local major_version=${version%%.*} @@ -40,6 +42,22 @@ update_versions() { meson.build rm -f meson.build.bak git add meson.build + + # Add a new version entry only when the next release is a new major release + if [ "${type}" = "snapshot" -a \ + "${next_version}" = "${major_version}.0.0" ]; then + sed -i.bak -E -e \ + "s/^ALL_VERSIONS = \[$/&\\n (${major_version}, 0),/" \ + tool/generate-version-header.py + rm -f tool/generate-version-header.py.bak + git add tool/generate-version-header.py + fi + + sed -i.bak -E -e \ + "s/\"version-string\": \".+\"/\"version-string\": \"${version}\"/" \ + vcpkg.json + rm -f vcpkg.json.bak + git add vcpkg.json popd pushd "${ARROW_DIR}/ci/scripts" @@ -110,10 +128,10 @@ update_versions() { pushd "${ARROW_DIR}/python" sed -i.bak -E -e \ - "s/^default_version = '.+'/default_version = '${version}'/" \ - setup.py - rm -f setup.py.bak - git add setup.py + "s/^fallback_version = '.+'/fallback_version = '${python_version}'/" \ + pyproject.toml + rm -f pyproject.toml.bak + git add pyproject.toml sed -i.bak -E -e \ "s/^set\(PYARROW_VERSION \".+\"\)/set(PYARROW_VERSION \"${version}\")/" \ CMakeLists.txt @@ -127,7 +145,7 @@ update_versions() { DESCRIPTION rm -f DESCRIPTION.bak git add DESCRIPTION - + # Replace dev version with release version sed -i.bak -E -e \ "/^c1{{Send #60;want_data#gt; Msg}} +subgraph meta [Meta Message] + direction LR + m1[/Msg Type #40;byte 0#41;
Seq Num #40;bytes 1-5#41;/]-- type 1 -->m2[[Process IPC Header]] + m2-- IPC has body -->m3[Get Corresponding
Tagged Msg] + m2-- Schema Msg -->m4[/Store Schema/] + m1-- type 0 -->e[Indicate End of Stream] +end +subgraph data [Data Stream] + direction LR + d1[Request Msg
for Seq Num]-->d2{Most Significant
Byte} + d2-- 0 -->d3[Construct from
Metadata and Body] + d2-- 1 -->d4[Get shared/remote
buffers] + d4 -->d5[Construct from
Metadata and buffers] + d3 & d5 -->e2[Output Batch] +end + +client -- recv untagged msg --> meta +client -- get tagged msg --> data diff --git a/docs/source/format/DissociatedIPC/SequenceDiagramSame.mmd b/docs/source/format/DissociatedIPC/SequenceDiagramSame.mmd new file mode 100644 index 0000000000000..adf26bdc32767 --- /dev/null +++ b/docs/source/format/DissociatedIPC/SequenceDiagramSame.mmd @@ -0,0 +1,43 @@ +%% Licensed to the Apache Software Foundation (ASF) under one +%% or more contributor license agreements. See the NOTICE file +%% distributed with this work for additional information +%% regarding copyright ownership. The ASF licenses this file +%% to you under the Apache License, Version 2.0 (the +%% "License"); you may not use this file except in compliance +%% with the License. You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, +%% software distributed under the License is distributed on an +%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +%% KIND, either express or implied. See the License for the +%% specific language governing permissions and limitations +%% under the License. + +sequenceDiagram + participant C as Client + participant S as Server + activate C + C-->>+S: TaggedMessage(server.want_data, bytes=ID_of_desired_data) + S-->>C: Message(bytes([1]) + le_bytes(sequence_number) + schema_metadata) + par + loop each chunk + S-->>C: Message(bytes([1]) + le_bytes(sequence_number) + batch_metadata) + end + S-->>C: Message(bytes([0]) + le_bytes(sequence_number)) + and + loop each chunk + alt + S-->>C: TaggedMessage((bytes[0] << 55) | le_bytes(sequence_number),
bytes=batch_data) + else + S-->>C: TaggedMessage((bytes[1] << 55) | le_bytes(sequence_number),
bytes=uint64_pairs) + end + end + end + + loop + C-->>S: TaggedMessage(server.free_data, bytes=uint64_list) + end + deactivate S + deactivate C diff --git a/docs/source/format/DissociatedIPC/SequenceDiagramSeparate.mmd b/docs/source/format/DissociatedIPC/SequenceDiagramSeparate.mmd new file mode 100644 index 0000000000000..11d2d9d6387eb --- /dev/null +++ b/docs/source/format/DissociatedIPC/SequenceDiagramSeparate.mmd @@ -0,0 +1,44 @@ +%% Licensed to the Apache Software Foundation (ASF) under one +%% or more contributor license agreements. See the NOTICE file +%% distributed with this work for additional information +%% regarding copyright ownership. The ASF licenses this file +%% to you under the Apache License, Version 2.0 (the +%% "License"); you may not use this file except in compliance +%% with the License. You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, +%% software distributed under the License is distributed on an +%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +%% KIND, either express or implied. See the License for the +%% specific language governing permissions and limitations +%% under the License. + +sequenceDiagram + participant D as Data Stream + participant C as Client + participant M as Metadata Stream + + activate C + C-->>+M: TaggedMessage(server.want_data, bytes=ID_of_desired_data) + C-->>+D: TaggedMessage(server.want_data, bytes=ID_of_desired_data) + M-->>C: Message(bytes([1]) + le_bytes(sequence_number) + schema_metadata) + loop each batch + par + M-->>C: Message(bytes([1]) + le_bytes(sequence_number) + batch_metadata) + and + alt + D-->>C: TaggedMessage((bytes[0] << 55) | le_bytes(sequence_number),
bytes=batch_data) + else + D-->>C: TaggedMessage((bytes[1] << 55) | le_bytes(sequence_number),
bytes=uint64_pairs) + end + end + end + M-->>C: Message(bytes([0]) + le_bytes(sequence_number)) + deactivate M + loop + C-->>D: TaggedMessage(server.free_data, bytes=uint64_list) + end + deactivate D + deactivate C diff --git a/docs/source/format/Flight.rst b/docs/source/format/Flight.rst index 7ee84952b4350..2c5487d857ea4 100644 --- a/docs/source/format/Flight.rst +++ b/docs/source/format/Flight.rst @@ -68,9 +68,8 @@ Downloading Data A client that wishes to download the data would: -.. figure:: ./Flight/DoGet.mmd.svg - - Retrieving data via ``DoGet``. +.. mermaid:: ./Flight/DoGet.mmd + :caption: Retrieving data via ``DoGet``. #. Construct or acquire a ``FlightDescriptor`` for the data set they are interested in. @@ -168,9 +167,8 @@ data. However, ``GetFlightInfo`` doesn't return until the query completes, so the client is blocked. In this situation, the client can use ``PollFlightInfo`` instead of ``GetFlightInfo``: -.. figure:: ./Flight/PollFlightInfo.mmd.svg - - Polling a long-running query by ``PollFlightInfo``. +.. mermaid:: ./Flight/PollFlightInfo.mmd + :caption: Polling a long-running query by ``PollFlightInfo``. #. Construct or acquire a ``FlightDescriptor``, as before. #. Call ``PollFlightInfo(FlightDescriptor)`` to get a ``PollInfo`` @@ -229,9 +227,8 @@ Uploading Data To upload data, a client would: -.. figure:: ./Flight/DoPut.mmd.svg - - Uploading data via ``DoPut``. +.. mermaid:: ./Flight/DoPut.mmd + :caption: Uploading data via ``DoPut``. #. Construct or acquire a ``FlightDescriptor``, as before. #. Call ``DoPut(FlightData)`` and upload a stream of Arrow record @@ -257,9 +254,8 @@ require being stateful if implemented using ``DoGet`` and ``DoPut``. Instead, ``DoExchange`` allows this to be implemented as a single call. A client would: -.. figure:: ./Flight/DoExchange.mmd.svg - - Complex data flow with ``DoExchange``. +.. mermaid:: ./Flight/DoExchange.mmd + :caption: Complex data flow with ``DoExchange``. #. Construct or acquire a ``FlightDescriptor``, as before. #. Call ``DoExchange(FlightData)``. @@ -314,6 +310,8 @@ well, in which case any `authentication method supported by gRPC .. _Mutual TLS (mTLS): https://grpc.io/docs/guides/auth/#supported-auth-mechanisms +.. _flight-location-uris: + Location URIs ============= diff --git a/docs/source/format/Flight/DoExchange.mmd b/docs/source/format/Flight/DoExchange.mmd index 14f1789aeaaa7..f7586bf35eb4f 100644 --- a/docs/source/format/Flight/DoExchange.mmd +++ b/docs/source/format/Flight/DoExchange.mmd @@ -15,9 +15,6 @@ %% specific language governing permissions and limitations %% under the License. -%% To generate the diagram, use mermaid-cli -%% Example: docker run --rm -v $(pwd)/FlightSql:/data minlag/mermaid-cli -i /data/CommandGetTables.mmd - sequenceDiagram autonumber diff --git a/docs/source/format/Flight/DoExchange.mmd.svg b/docs/source/format/Flight/DoExchange.mmd.svg deleted file mode 100644 index 204d63d77218d..0000000000000 --- a/docs/source/format/Flight/DoExchange.mmd.svg +++ /dev/null @@ -1 +0,0 @@ -ClientServerThe first FlightData includes a FlightDescriptorDoExchange(FlightData)1stream of FlightData2stream of FlightData3par[[Client sends data]][[Server sends data]]ClientServer \ No newline at end of file diff --git a/docs/source/format/Flight/DoGet.mmd b/docs/source/format/Flight/DoGet.mmd index c2e3cd034448c..cac59afb8219f 100644 --- a/docs/source/format/Flight/DoGet.mmd +++ b/docs/source/format/Flight/DoGet.mmd @@ -15,9 +15,6 @@ %% specific language governing permissions and limitations %% under the License. -%% To generate the diagram, use mermaid-cli -%% Example: docker run --rm -v $(pwd)/FlightSql:/data minlag/mermaid-cli -i /data/CommandGetTables.mmd - sequenceDiagram autonumber diff --git a/docs/source/format/Flight/DoGet.mmd.svg b/docs/source/format/Flight/DoGet.mmd.svg deleted file mode 100644 index 48a50d77ed33f..0000000000000 --- a/docs/source/format/Flight/DoGet.mmd.svg +++ /dev/null @@ -1 +0,0 @@ -ClientMetadata ServerData ServerGetFlightInfo(FlightDescriptor)1FlightInfo{endpoints: [FlightEndpoint{ticket: Ticket}, …]}2This may be parallelizedDoGet(Ticket)3stream of FlightData4loop[for each endpoint in FlightInfo.endpoints]ClientMetadata ServerData Server \ No newline at end of file diff --git a/docs/source/format/Flight/DoPut.mmd b/docs/source/format/Flight/DoPut.mmd index 5845edef1f466..876505da2d300 100644 --- a/docs/source/format/Flight/DoPut.mmd +++ b/docs/source/format/Flight/DoPut.mmd @@ -15,9 +15,6 @@ %% specific language governing permissions and limitations %% under the License. -%% To generate the diagram, use mermaid-cli -%% Example: docker run --rm -v $(pwd)/FlightSql:/data minlag/mermaid-cli -i /data/CommandGetTables.mmd - sequenceDiagram autonumber diff --git a/docs/source/format/Flight/DoPut.mmd.svg b/docs/source/format/Flight/DoPut.mmd.svg deleted file mode 100644 index 9e490e152bdb3..0000000000000 --- a/docs/source/format/Flight/DoPut.mmd.svg +++ /dev/null @@ -1 +0,0 @@ -ClientServerThe first FlightData includes a FlightDescriptorDoPut(FlightData)1stream of FlightData2PutResult{app_metadata}3ClientServer \ No newline at end of file diff --git a/docs/source/format/Flight/PollFlightInfo.mmd b/docs/source/format/Flight/PollFlightInfo.mmd index d062a3a216958..f91c077b655c0 100644 --- a/docs/source/format/Flight/PollFlightInfo.mmd +++ b/docs/source/format/Flight/PollFlightInfo.mmd @@ -15,9 +15,6 @@ %% specific language governing permissions and limitations %% under the License. -%% To generate the diagram, use mermaid-cli -%% Example: docker run --rm -v $(pwd):/data minlag/mermaid-cli -i /data/PollFlightInfo.mmd - sequenceDiagram autonumber diff --git a/docs/source/format/Flight/PollFlightInfo.mmd.svg b/docs/source/format/Flight/PollFlightInfo.mmd.svg deleted file mode 100644 index 1890361f88ce4..0000000000000 --- a/docs/source/format/Flight/PollFlightInfo.mmd.svg +++ /dev/null @@ -1 +0,0 @@ -ClientMetadata ServerData ServerThis may be parallelizedSome endpoints may be processed while pollingloop[for each endpoint in FlightInfo.endpoints]PollFlightInfo(FlightDescriptor)1PollInfo{descriptor: FlightDescriptor', ...}2PollFlightInfo(FlightDescriptor')3PollInfo{descriptor: FlightDescriptor'', ...}4PollFlightInfo(FlightDescriptor'')5PollInfo{descriptor: null, info: FlightInfo{endpoints: [FlightEndpoint{ticket: Ticket}, …]}6DoGet(Ticket)7stream of FlightData8ClientMetadata ServerData Server \ No newline at end of file diff --git a/docs/source/format/FlightSql.rst b/docs/source/format/FlightSql.rst index 1a43e4bdff306..b4b85e77a2e5f 100644 --- a/docs/source/format/FlightSql.rst +++ b/docs/source/format/FlightSql.rst @@ -32,9 +32,6 @@ with any database that supports the necessary endpoints. Flight SQL clients wrap the underlying Flight client to provide methods for the new RPC methods described here. -.. warning:: Flight SQL is **experimental** and changes to the - protocol may still be made. - RPC Methods =========== @@ -196,7 +193,7 @@ in the ``app_metadata`` field of the Flight RPC ``PutResult`` returned. When used with DoPut: load the stream of Arrow record batches into the specified target table and return the number of rows ingested - via a `DoPutUpdateResult` message. + via a ``DoPutUpdateResult`` message. Flight Server Session Management -------------------------------- @@ -242,21 +239,17 @@ Close and invalidate the current session context. Sequence Diagrams ================= -.. figure:: ./FlightSql/CommandGetTables.mmd.svg - - Listing available tables. - -.. figure:: ./FlightSql/CommandStatementQuery.mmd.svg - - Executing an ad-hoc query. - -.. figure:: ./FlightSql/CommandPreparedStatementQuery.mmd.svg +.. mermaid:: ./FlightSql/CommandGetTables.mmd + :caption: Listing available tables. - Creating a prepared statement, then executing it. +.. mermaid:: ./FlightSql/CommandStatementQuery.mmd + :caption: Executing an ad-hoc query. -.. figure:: ./FlightSql/CommandStatementIngest.mmd.svg +.. mermaid:: ./FlightSql/CommandPreparedStatementQuery.mmd + :caption: Creating a prepared statement, then executing it. - Executing a bulk ingestion. +.. mermaid:: ./FlightSql/CommandStatementIngest.mmd + :caption: Executing a bulk ingestion. External Resources ================== diff --git a/docs/source/format/FlightSql/CommandGetTables.mmd b/docs/source/format/FlightSql/CommandGetTables.mmd index f151411647f23..e6b18ed7dc08b 100644 --- a/docs/source/format/FlightSql/CommandGetTables.mmd +++ b/docs/source/format/FlightSql/CommandGetTables.mmd @@ -15,9 +15,6 @@ %% specific language governing permissions and limitations %% under the License. -%% To generate the diagram, use mermaid-cli -%% Example: docker run --rm -v $(pwd)/FlightSql:/data minlag/mermaid-cli -i /data/CommandGetTables.mmd - sequenceDiagram autonumber diff --git a/docs/source/format/FlightSql/CommandGetTables.mmd.svg b/docs/source/format/FlightSql/CommandGetTables.mmd.svg deleted file mode 100644 index 4e71c01982289..0000000000000 --- a/docs/source/format/FlightSql/CommandGetTables.mmd.svg +++ /dev/null @@ -1 +0,0 @@ -ClientServerGetFlightInfo(CommandGetTables)1FlightInfo{…Ticket…}2DoGet(Ticket)3stream of FlightData4ClientServer \ No newline at end of file diff --git a/docs/source/format/FlightSql/CommandPreparedStatementQuery.mmd b/docs/source/format/FlightSql/CommandPreparedStatementQuery.mmd index cbd1eb6014bca..ce18b91eaa33e 100644 --- a/docs/source/format/FlightSql/CommandPreparedStatementQuery.mmd +++ b/docs/source/format/FlightSql/CommandPreparedStatementQuery.mmd @@ -15,9 +15,6 @@ %% specific language governing permissions and limitations %% under the License. -%% To generate the diagram, use mermaid-cli -%% Example: docker run --rm -v $(pwd)/FlightSql:/data minlag/mermaid-cli -i /data/CommandPreparedStatementQuery.mmd - sequenceDiagram autonumber diff --git a/docs/source/format/FlightSql/CommandPreparedStatementQuery.mmd.svg b/docs/source/format/FlightSql/CommandPreparedStatementQuery.mmd.svg deleted file mode 100644 index cbf6a78e9a5ce..0000000000000 --- a/docs/source/format/FlightSql/CommandPreparedStatementQuery.mmd.svg +++ /dev/null @@ -1 +0,0 @@ -ServerClientServerClientoptional response with updated handleloop[for each endpoint in FlightInfo.endpoints]loop[for each invocation of the prepared statement]DoAction(ActionCreatePreparedStatementRequest)1ActionCreatePreparedStatementResult{handle}2DoPut(CommandPreparedStatementQuery)3stream of FlightData4DoPutPreparedStatementResult{handle}5GetFlightInfo(CommandPreparedStatementQuery)6FlightInfo{endpoints: [FlightEndpoint{…}, …]}7DoGet(endpoint.ticket)8stream of FlightData9DoAction(ActionClosePreparedStatementRequest)10ActionClosePreparedStatementRequest{}11 \ No newline at end of file diff --git a/docs/source/format/FlightSql/CommandStatementIngest.mmd b/docs/source/format/FlightSql/CommandStatementIngest.mmd index 781289d77b41a..0578f465d4dda 100644 --- a/docs/source/format/FlightSql/CommandStatementIngest.mmd +++ b/docs/source/format/FlightSql/CommandStatementIngest.mmd @@ -15,9 +15,6 @@ %% specific language governing permissions and limitations %% under the License. -%% To generate the diagram, use mermaid-cli -%% Example: docker run --rm -v $(pwd)/FlightSql:/data minlag/mermaid-cli -i /data/CommandGetTables.mmd - sequenceDiagram autonumber diff --git a/docs/source/format/FlightSql/CommandStatementIngest.mmd.svg b/docs/source/format/FlightSql/CommandStatementIngest.mmd.svg deleted file mode 100644 index e2aa72459afa5..0000000000000 --- a/docs/source/format/FlightSql/CommandStatementIngest.mmd.svg +++ /dev/null @@ -1 +0,0 @@ -ServerClientServerClientDoPut(CommandStatementIngest)1stream of FlightData2PutResult{DoPutUpdateResult{RecordCount: int64}}3 \ No newline at end of file diff --git a/docs/source/format/FlightSql/CommandStatementQuery.mmd b/docs/source/format/FlightSql/CommandStatementQuery.mmd index 7b67fecfb75c6..f26aa2f951fcf 100644 --- a/docs/source/format/FlightSql/CommandStatementQuery.mmd +++ b/docs/source/format/FlightSql/CommandStatementQuery.mmd @@ -15,9 +15,6 @@ %% specific language governing permissions and limitations %% under the License. -%% To generate the diagram, use mermaid-cli -%% Example: docker run --rm -v $(pwd)/FlightSql:/data minlag/mermaid-cli -i /data/CommandStatementQuery.mmd - sequenceDiagram autonumber diff --git a/docs/source/format/FlightSql/CommandStatementQuery.mmd.svg b/docs/source/format/FlightSql/CommandStatementQuery.mmd.svg deleted file mode 100644 index f5e8c79f137ff..0000000000000 --- a/docs/source/format/FlightSql/CommandStatementQuery.mmd.svg +++ /dev/null @@ -1 +0,0 @@ -ClientServerGetFlightInfo(CommandStatementQuery)1FlightInfo{endpoints: [FlightEndpoint{…}, …]}2DoGet(endpoint.ticket)3stream of FlightData4loop[for each endpoint in FlightInfo.endpoints]ClientServer \ No newline at end of file diff --git a/docs/source/format/Glossary.rst b/docs/source/format/Glossary.rst index 3f2f118a95d6d..11c19c5fa70e9 100644 --- a/docs/source/format/Glossary.rst +++ b/docs/source/format/Glossary.rst @@ -211,7 +211,7 @@ Glossary its bindings, and Go). .. image:: ../cpp/tables-versus-record-batches.svg - :alt: A graphical representation of an Arrow Table and a + :alt: A graphical representation of an Arrow Table and a Record Batch, with structure as described in text above. .. seealso:: :term:`chunked array`, :term:`record batch` diff --git a/docs/source/format/Integration.rst b/docs/source/format/Integration.rst index 1a9b1b97f07ee..436747989acf3 100644 --- a/docs/source/format/Integration.rst +++ b/docs/source/format/Integration.rst @@ -501,14 +501,14 @@ integration testing actually tests. There are two types of integration test cases: the ones populated on the fly by the data generator in the Archery utility, and *gold* files that exist -in the `arrow-testing ` +in the `arrow-testing `_ repository. Data Generator Tests ~~~~~~~~~~~~~~~~~~~~ This is the high-level description of the cases which are generated and -tested using the ``archery integration`` command (see ``get_generated_json_files`` +tested using the ``archery integration`` command (see ``get_generated_json_files`` in ``datagen.py``): * Primitive Types @@ -549,7 +549,7 @@ Gold File Integration Tests Pre-generated json and arrow IPC files (both file and stream format) exist in the `arrow-testing `__ repository in the ``data/arrow-ipc-stream/integration`` directory. These serve as -*gold* files that are assumed to be correct for use in testing. They are +*gold* files that are assumed to be correct for use in testing. They are referenced by ``runner.py`` in the code for the :ref:`Archery ` utility. Below are the test cases which are covered by them: @@ -563,7 +563,7 @@ utility. Below are the test cases which are covered by them: + intervals + maps + nested types (list, struct) - + primitives + + primitives + primitive with no batches + primitive with zero length batches diff --git a/docs/source/format/Versioning.rst b/docs/source/format/Versioning.rst index 7ba01107074d0..8fcf11b21f0cc 100644 --- a/docs/source/format/Versioning.rst +++ b/docs/source/format/Versioning.rst @@ -51,7 +51,7 @@ data. An increase in the **minor** version of the format version, such as 1.0.0 to 1.1.0, indicates that 1.1.0 contains new features not available in 1.0.0. So long as these features are not used (such as a -new logical data type), forward compatibility is preserved. +new data type), forward compatibility is preserved. Long-Term Stability =================== diff --git a/docs/source/format/index.rst b/docs/source/format/index.rst index 856830d863243..44ea3e8e7e608 100644 --- a/docs/source/format/index.rst +++ b/docs/source/format/index.rst @@ -30,6 +30,7 @@ Specifications CDataInterface CStreamInterface CDeviceDataInterface + DissociatedIPC Flight FlightSql ADBC diff --git a/docs/source/java/algorithm.rst b/docs/source/java/algorithm.rst index 316fd38fa0990..d4838967d614f 100644 --- a/docs/source/java/algorithm.rst +++ b/docs/source/java/algorithm.rst @@ -20,12 +20,12 @@ Java Algorithms Arrow's Java library provides algorithms for some commonly-used functionalities. The algorithms are provided in the ``org.apache.arrow.algorithm`` -package of the ``algorithm`` module. +package of the ``algorithm`` module. Comparing Vector Elements ------------------------- -Comparing vector elements is the basic for many algorithms. Vector +Comparing vector elements is the basic for many algorithms. Vector elements can be compared in one of the two ways: 1. **Equality comparison**: there are two possible results for this type of comparisons: ``equal`` and ``unequal``. @@ -36,30 +36,30 @@ interface. and ``greater than``. This comparison is supported by the abstract class ``org.apache.arrow.algorithm.sort.VectorValueComparator``. We provide default implementations to compare vector elements. However, users can also define ways -for customized comparisons. +for customized comparisons. Vector Element Search --------------------- -A search algorithm tries to find a particular value in a vector. When successful, a vector index is +A search algorithm tries to find a particular value in a vector. When successful, a vector index is returned; otherwise, a ``-1`` is returned. The following search algorithms are provided: -1. **Linear search**: this algorithm simply traverses the vector from the beginning, until a match is +1. **Linear search**: this algorithm simply traverses the vector from the beginning, until a match is found, or the end of the vector is reached. So it takes ``O(n)`` time, where ``n`` is the number of elements in the vector. This algorithm is implemented in ``org.apache.arrow.algorithm.search.VectorSearcher#linearSearch``. -2. **Binary search**: this represents a more efficient search algorithm, as it runs in ``O(log(n))`` time. +2. **Binary search**: this represents a more efficient search algorithm, as it runs in ``O(log(n))`` time. However, it is only applicable to sorted vectors. To get a sorted vector, one can use one of our sorting algorithms, which will be discussed in the next section. This algorithm is implemented in ``org.apache.arrow.algorithm.search.VectorSearcher#binarySearch``. 3. **Parallel search**: when the vector is large, it takes a long time to traverse the elements to search -for a value. To make this process faster, one can split the vector into multiple partitions, and perform the +for a value. To make this process faster, one can split the vector into multiple partitions, and perform the search for each partition in parallel. This is supported by ``org.apache.arrow.algorithm.search.ParallelSearcher``. -4. **Range search**: for many scenarios, there can be multiple matching values in the vector. +4. **Range search**: for many scenarios, there can be multiple matching values in the vector. If the vector is sorted, the matching values reside in a contiguous region in the vector. The -range search algorithm tries to find the upper/lower bound of the region in ``O(log(n))`` time. +range search algorithm tries to find the upper/lower bound of the region in ``O(log(n))`` time. An implementation is provided in ``org.apache.arrow.algorithm.search.VectorRangeSearcher``. Vector Sorting @@ -72,19 +72,19 @@ classified into the following categories: 1. **In-place sorter**: an in-place sorter performs the sorting by manipulating the original vector, without creating any new vector. So it just returns the original vector after the sorting operations. Currently, we have ``org.apache.arrow.algorithm.sort.FixedWidthInPlaceVectorSorter`` for in-place -sorting in ``O(nlog(n))`` time. As the name suggests, it only supports fixed width vectors. +sorting in ``O(nlog(n))`` time. As the name suggests, it only supports fixed width vectors. 2. **Out-of-place sorter**: an out-of-place sorter does not mutate the original vector. Instead, it copies vector elements to a new vector in sorted order, and returns the new vector. -We have ``org.apache.arrow.algorithm.sort.FixedWidthInPlaceVectorSorter.FixedWidthOutOfPlaceVectorSorter`` +We have ``org.apache.arrow.algorithm.sort.FixedWidthInPlaceVectorSorter.FixedWidthOutOfPlaceVectorSorter`` and ``org.apache.arrow.algorithm.sort.FixedWidthInPlaceVectorSorter.VariableWidthOutOfPlaceVectorSorter`` -for fixed width and variable width vectors, respectively. Both algorithms run in ``O(nlog(n))`` time. +for fixed width and variable width vectors, respectively. Both algorithms run in ``O(nlog(n))`` time. 3. **Index sorter**: this sorter does not actually sort the vector. Instead, it returns an integer vector, which correspond to indices of vector elements in sorted order. With the index vector, one can -easily construct a sorted vector. In addition, some other tasks can be easily achieved, like finding the ``k``th -smallest value in the vector. Index sorting is supported by ``org.apache.arrow.algorithm.sort.IndexSorter``, -which runs in ``O(nlog(n))`` time. It is applicable to vectors of any type. +easily construct a sorted vector. In addition, some other tasks can be easily achieved, like finding the ``k`` th +smallest value in the vector. Index sorting is supported by ``org.apache.arrow.algorithm.sort.IndexSorter``, +which runs in ``O(nlog(n))`` time. It is applicable to vectors of any type. Other Algorithms ---------------- diff --git a/docs/source/java/flight.rst b/docs/source/java/flight.rst index e009998be4f4e..6d26583aeefa6 100644 --- a/docs/source/java/flight.rst +++ b/docs/source/java/flight.rst @@ -184,7 +184,7 @@ Handshake-based authentication can be enabled by implementing ``ServerAuthHandler``. Authentication consists of two parts: on initial client connection, the server and client authentication implementations can perform any negotiation needed. The client authentication -handler then provides a token that will be attached to future calls. +handler then provides a token that will be attached to future calls. The client send data to be validated through ``ClientAuthHandler.authenticate`` The server validate data received through ``ServerAuthHandler.authenticate``. diff --git a/docs/source/java/flight_sql_jdbc_driver.rst b/docs/source/java/flight_sql_jdbc_driver.rst index 0ace2185983a9..f95c2ac755d97 100644 --- a/docs/source/java/flight_sql_jdbc_driver.rst +++ b/docs/source/java/flight_sql_jdbc_driver.rst @@ -162,15 +162,15 @@ the Flight SQL service as gRPC headers. For example, the following URI :: This will connect without authentication or encryption, to a Flight SQL service running on ``localhost`` on port 12345. Each request will -also include a `database=mydb` gRPC header. +also include a ``database=mydb`` gRPC header. Connection parameters may also be supplied using the Properties object when using the JDBC Driver Manager to connect. When supplying using the Properties object, values should *not* be URI-encoded. Parameters specified by the URI supercede parameters supplied by the -Properties object. When calling the `user/password overload of -DriverManager#getConnection() +Properties object. When calling the `user/password overload of +DriverManager#getConnection() `_, the username and password supplied on the URI supercede the username and password arguments to the function call. diff --git a/docs/source/java/install.rst b/docs/source/java/install.rst index a551edc36c477..dc6a55c87fcd6 100644 --- a/docs/source/java/install.rst +++ b/docs/source/java/install.rst @@ -63,7 +63,7 @@ Modifying the command above for Flight: Otherwise, you may see errors like ``java.lang.IllegalAccessError: superclass access check failed: class org.apache.arrow.flight.ArrowMessage$ArrowBufRetainingCompositeByteBuf (in module org.apache.arrow.flight.core) cannot access class io.netty.buffer.CompositeByteBuf (in unnamed module ...) because module -org.apache.arrow.flight.core does not read unnamed module ... +org.apache.arrow.flight.core does not read unnamed module ...`` Finally, if you are using arrow-dataset, you'll also need to report that JDK internals need to be exposed. Modifying the command above for arrow-memory: diff --git a/docs/source/java/ipc.rst b/docs/source/java/ipc.rst index 01341ff2cc391..f5939179177d5 100644 --- a/docs/source/java/ipc.rst +++ b/docs/source/java/ipc.rst @@ -81,7 +81,7 @@ Here we used an in-memory stream, but this could have been a socket or some othe writer.end(); Note that, since the :class:`VectorSchemaRoot` in the writer is a container that can hold batches, batches flow through -:class:`VectorSchemaRoot` as part of a pipeline, so we need to populate data before `writeBatch`, so that later batches +:class:`VectorSchemaRoot` as part of a pipeline, so we need to populate data before ``writeBatch``, so that later batches could overwrite previous ones. Now the :class:`ByteArrayOutputStream` contains the complete stream which contains 5 record batches. diff --git a/docs/source/java/memory.rst b/docs/source/java/memory.rst index 036befa148692..8014a27444ac9 100644 --- a/docs/source/java/memory.rst +++ b/docs/source/java/memory.rst @@ -20,7 +20,7 @@ Memory Management ================= The memory modules contain all the functionality that Arrow uses to allocate and deallocate memory. This document is divided in two parts: -The first part, *Memory Basics*, provides a high-level introduction. The following section, *Arrow Memory In-Depth*, fills in the details. +The first part, *Memory Basics*, provides a high-level introduction. The following section, *Arrow Memory In-Depth*, fills in the details. .. contents:: @@ -39,7 +39,7 @@ Getting Started Arrow's memory management is built around the needs of the columnar format and using off-heap memory. Arrow Java has its own independent implementation. It does not wrap the C++ implementation, although the framework is flexible enough -to be used with memory allocated in C++ that is used by Java code. +to be used with memory allocated in C++ that is used by Java code. Arrow provides multiple modules: the core interfaces, and implementations of the interfaces. Users need the core interfaces, and exactly one of the implementations. @@ -67,9 +67,9 @@ Why Arrow Uses Direct Memory BufferAllocator --------------- -The `BufferAllocator`_ is primarily an arena or nursery used for accounting of buffers (ArrowBuf instances). -As the name suggests, it can allocate new buffers associated with itself, but it can also -handle the accounting for buffers allocated elsewhere. For example, it handles the Java-side accounting for +The `BufferAllocator`_ is primarily an arena or nursery used for accounting of buffers (ArrowBuf instances). +As the name suggests, it can allocate new buffers associated with itself, but it can also +handle the accounting for buffers allocated elsewhere. For example, it handles the Java-side accounting for memory allocated in C++ and shared with Java using the C-Data Interface. In the code below it performs an allocation: .. code-block:: Java @@ -100,21 +100,21 @@ memory from a child allocator, those allocations are also reflected in all paren effectively sets the program-wide memory limit, and serves as the master bookkeeper for all memory allocations. Child allocators are not strictly required, but can help better organize code. For instance, a lower memory limit can -be set for a particular section of code. The child allocator can be closed when that section completes, -at which point it checks that that section didn't leak any memory. +be set for a particular section of code. The child allocator can be closed when that section completes, +at which point it checks that that section didn't leak any memory. Child allocators can also be named, which makes it easier to tell where an ArrowBuf came from during debugging. Reference counting ------------------ -Because direct memory is expensive to allocate and deallocate, allocators may share direct buffers. To managed shared buffers -deterministically, we use manual reference counting instead of the garbage collector. +Because direct memory is expensive to allocate and deallocate, allocators may share direct buffers. To managed shared buffers +deterministically, we use manual reference counting instead of the garbage collector. This simply means that each buffer has a counter keeping track of the number of references to the buffer, and the user is responsible for properly incrementing/decrementing the counter as the buffer is used. In Arrow, each ArrowBuf has an associated `ReferenceManager`_ that tracks the reference count. You can retrieve -it with ArrowBuf.getReferenceManager(). The reference count is updated using `ReferenceManager.release`_ to decrement the count, -and `ReferenceManager.retain`_ to increment it. +it with ArrowBuf.getReferenceManager(). The reference count is updated using `ReferenceManager.release`_ to decrement the count, +and `ReferenceManager.retain`_ to increment it. Of course, this is tedious and error-prone, so instead of directly working with buffers, we typically use higher-level APIs like ValueVector. Such classes generally implement Closeable/AutoCloseable and will automatically @@ -289,7 +289,7 @@ Finally, enabling the ``TRACE`` logging level will automatically provide this st | at (#8:1) Sometimes, explicitly passing allocators around is difficult. For example, it -can be hard to pass around extra state, like an allocator, through layers of +can be hard to pass around extra state, like an allocator, through layers of existing application or framework code. A global or singleton allocator instance can be useful here, though it should not be your first choice. @@ -370,7 +370,7 @@ Arrow’s memory model is based on the following basic concepts: leaks. - The same physical memory can be shared by multiple allocators and the allocator must provide an accounting paradigm for this purpose. - + Reserving Memory ---------------- @@ -384,17 +384,17 @@ Arrow provides two different ways to reserve memory: - ``AllocationReservation`` via BufferAllocator.newReservation(): Allows a short-term preallocation strategy so that a particular subsystem can ensure future memory is available to support a - particular request. - + particular request. + Reference Counting Details -------------------------- -Typically, the ReferenceManager implementation used is an instance of `BufferLedger`_. -A BufferLedger is a ReferenceManager that also maintains the relationship between an ``AllocationManager``, +Typically, the ReferenceManager implementation used is an instance of `BufferLedger`_. +A BufferLedger is a ReferenceManager that also maintains the relationship between an ``AllocationManager``, a ``BufferAllocator`` and one or more individual ``ArrowBuf``\ s -All ArrowBufs (direct or sliced) related to a single BufferLedger/BufferAllocator combination -share the same reference count and either all will be valid or all will be invalid. +All ArrowBufs (direct or sliced) related to a single BufferLedger/BufferAllocator combination +share the same reference count and either all will be valid or all will be invalid. For simplicity of accounting, we treat that memory as being used by one of the BufferAllocators associated with the memory. When that allocator releases its claim on that memory, the memory ownership is then moved to @@ -411,7 +411,7 @@ There are several Allocator types in Arrow Java: - ``ChildAllocator`` - A child allocator that derives from the root allocator Many BufferAllocators can reference the same piece of physical memory at the same -time. It is the AllocationManager’s responsibility to ensure that in this situation, +time. It is the AllocationManager’s responsibility to ensure that in this situation, all memory is accurately accounted for from the Root’s perspective and also to ensure that the memory is correctly released once all BufferAllocators have stopped using that memory. diff --git a/docs/source/java/overview.rst b/docs/source/java/overview.rst index 9d9cbad8a26c1..7780ee32ec9bc 100644 --- a/docs/source/java/overview.rst +++ b/docs/source/java/overview.rst @@ -54,10 +54,10 @@ but some modules are JNI bindings to the C++ library. - (Experimental) A library for converting JDBC data to Arrow data. - Native * - flight-core - - (Experimental) An RPC mechanism for transferring ValueVectors. + - An RPC mechanism for transferring ValueVectors. - Native * - flight-sql - - (Experimental) Contains utility classes to expose Flight SQL semantics for clients and servers over Arrow Flight. + - Contains utility classes to expose Flight SQL semantics for clients and servers over Arrow Flight. - Native * - flight-integration-tests - Integration tests for Flight RPC. diff --git a/docs/source/java/quickstartguide.rst b/docs/source/java/quickstartguide.rst index e358681c57830..1f3ec861d3f46 100644 --- a/docs/source/java/quickstartguide.rst +++ b/docs/source/java/quickstartguide.rst @@ -195,10 +195,10 @@ Example: Create a dataset of names (strings) and ages (32-bit signed integers). .. code-block:: shell VectorSchemaRoot created: - age name - 10 Dave - 20 Peter - 30 Mary + age name + 10 Dave + 20 Peter + 30 Mary Interprocess Communication (IPC) @@ -306,11 +306,11 @@ Example: Read the dataset from the previous example from an Arrow IPC file (rand Record batches in file: 1 VectorSchemaRoot read: - age name - 10 Dave - 20 Peter - 30 Mary + age name + 10 Dave + 20 Peter + 30 Mary More examples available at `Arrow Java Cookbook`_. -.. _`Arrow Java Cookbook`: https://arrow.apache.org/cookbook/java \ No newline at end of file +.. _`Arrow Java Cookbook`: https://arrow.apache.org/cookbook/java diff --git a/docs/source/java/substrait.rst b/docs/source/java/substrait.rst index c5857dcc23f75..fa20dbd61dbfb 100644 --- a/docs/source/java/substrait.rst +++ b/docs/source/java/substrait.rst @@ -100,9 +100,9 @@ Here is an example of a Java program that queries a Parquet file using Java Subs .. code-block:: text // Results example: - FieldPath(0) FieldPath(1) FieldPath(2) FieldPath(3) - 0 ALGERIA 0 haggle. carefully final deposits detect slyly agai - 1 ARGENTINA 1 al foxes promise slyly according to the regular accounts. bold requests alon + FieldPath(0) FieldPath(1) FieldPath(2) FieldPath(3) + 0 ALGERIA 0 haggle. carefully final deposits detect slyly agai + 1 ARGENTINA 1 al foxes promise slyly according to the regular accounts. bold requests alon Executing Projections and Filters Using Extended Expressions ============================================================ @@ -189,13 +189,13 @@ This Java program: .. code-block:: text - column-1 column-2 - 13 ROMANIA - ular asymptotes are about the furious multipliers. express dependencies nag above the ironically ironic account - 14 SAUDI ARABIA - ts. silent requests haggle. closely express packages sleep across the blithely - 12 VIETNAM - hely enticingly express accounts. even, final - 13 RUSSIA - requests against the platelets use never according to the quickly regular pint - 13 UNITED KINGDOM - eans boost carefully special requests. accounts are. carefull - 11 UNITED STATES - y final packages. slow foxes cajole quickly. quickly silent platelets breach ironic accounts. unusual pinto be + column-1 column-2 + 13 ROMANIA - ular asymptotes are about the furious multipliers. express dependencies nag above the ironically ironic account + 14 SAUDI ARABIA - ts. silent requests haggle. closely express packages sleep across the blithely + 12 VIETNAM - hely enticingly express accounts. even, final + 13 RUSSIA - requests against the platelets use never according to the quickly regular pint + 13 UNITED KINGDOM - eans boost carefully special requests. accounts are. carefull + 11 UNITED STATES - y final packages. slow foxes cajole quickly. quickly silent platelets breach ironic accounts. unusual pinto be .. _`Substrait`: https://substrait.io/ .. _`Substrait Java`: https://github.com/substrait-io/substrait-java diff --git a/docs/source/java/table.rst b/docs/source/java/table.rst index 603910f51694f..5aa95e153cea0 100644 --- a/docs/source/java/table.rst +++ b/docs/source/java/table.rst @@ -75,7 +75,7 @@ Tables are created from a ``VectorSchemaRoot`` as shown below. The memory buffer Table t = new Table(someVectorSchemaRoot); -If you now update the vectors held by the ``VectorSchemaRoot`` (using some version of `ValueVector#setSafe()`), it would reflect those changes, but the values in table *t* are unchanged. +If you now update the vectors held by the ``VectorSchemaRoot`` (using some version of ``ValueVector#setSafe()``), it would reflect those changes, but the values in table *t* are unchanged. Creating a Table from FieldVectors ********************************** @@ -243,7 +243,7 @@ It is important to recognize that rows are NOT reified as objects, but rather op Getting a row ************* -Calling `immutableRow()` on any table instance returns a new ``Row`` instance. +Calling ``immutableRow()`` on any table instance returns a new ``Row`` instance. .. code-block:: Java @@ -262,7 +262,7 @@ Since rows are iterable, you can traverse a table using a standard while loop: // do something useful here } -``Table`` implements `Iterable` so you can access rows directly from a table in an enhanced *for* loop: +``Table`` implements ``Iterable`` so you can access rows directly from a table in an enhanced *for* loop: .. code-block:: Java @@ -272,7 +272,7 @@ Since rows are iterable, you can traverse a table using a standard while loop: ... } -Finally, while rows are usually iterated in the order of the underlying data vectors, but they are also positionable using the `Row#setPosition()` method, so you can skip to a specific row. Row numbers are 0-based. +Finally, while rows are usually iterated in the order of the underlying data vectors, but they are also positionable using the ``Row#setPosition()`` method, so you can skip to a specific row. Row numbers are 0-based. .. code-block:: Java @@ -281,7 +281,7 @@ Finally, while rows are usually iterated in the order of the underlying data vec Any changes to position are applied to all the columns in the table. -Note that you must call `next()`, or `setPosition()` before accessing values via a row. Failure to do so results in a runtime exception. +Note that you must call ``next()``, or ``setPosition()`` before accessing values via a row. Failure to do so results in a runtime exception. Read operations using rows ************************** @@ -304,7 +304,7 @@ You can also get value using a nullable ``ValueHolder``. For example: This can be used to retrieve values without creating a new Object for each. -In addition to getting values, you can check if a value is null using `isNull()`. This is important if the vector contains any nulls, as asking for a value from a vector can cause NullPointerExceptions in some cases. +In addition to getting values, you can check if a value is null using ``isNull()``. This is important if the vector contains any nulls, as asking for a value from a vector can cause NullPointerExceptions in some cases. .. code-block:: Java @@ -352,13 +352,13 @@ Working with the C-Data interface The ability to work with native code is required for many Arrow features. This section describes how tables can be be exported for use with native code -Exporting works by converting the data to a ``VectorSchemaRoot`` instance and using the existing facilities to transfer the data. You could do it yourself, but that isn't ideal because conversion to a vector schema root breaks the immutability guarantees. Using the `exportTable()` methods in the `Data`_ class avoids this concern. +Exporting works by converting the data to a ``VectorSchemaRoot`` instance and using the existing facilities to transfer the data. You could do it yourself, but that isn't ideal because conversion to a vector schema root breaks the immutability guarantees. Using the ``exportTable()`` methods in the `Data`_ class avoids this concern. .. code-block:: Java Data.exportTable(bufferAllocator, table, dictionaryProvider, outArrowArray); -If the table contains dictionary-encoded vectors and was constructed with a ``DictionaryProvider``, the provider argument to `exportTable()` can be omitted and the table's provider attribute will be used: +If the table contains dictionary-encoded vectors and was constructed with a ``DictionaryProvider``, the provider argument to ``exportTable()`` can be omitted and the table's provider attribute will be used: .. code-block:: Java diff --git a/docs/source/java/vector.rst b/docs/source/java/vector.rst index abbbd1a236d6d..1c3e123cf50fb 100644 --- a/docs/source/java/vector.rst +++ b/docs/source/java/vector.rst @@ -226,7 +226,7 @@ A :class:`ListVector` is a vector that holds a list of values for each index. Wo For example, the code below shows how to build a :class:`ListVector` of int's using the writer :class:`UnionListWriter`. We build a vector from 0 to 9 and each index contains a list with values [[0, 0, 0, 0, 0], [0, 1, 2, 3, 4], [0, 2, 4, 6, 8], …, [0, 9, 18, 27, 36]]. List values can be added in any order so writing a list such as [3, 1, 2] would be just as valid. .. code-block:: Java - + try (BufferAllocator allocator = new RootAllocator(Long.MAX_VALUE); ListVector listVector = ListVector.empty("vector", allocator)) { UnionListWriter writer = listVector.getWriter(); @@ -240,7 +240,7 @@ For example, the code below shows how to build a :class:`ListVector` of int's us writer.endList(); } listVector.setValueCount(10); - } + } :class:`ListVector` values can be accessed either through the get API or through the reader class :class:`UnionListReader`. To read all the values, first enumerate through the indexes, and then enumerate through the inner list values. diff --git a/docs/source/python/api/arrays.rst b/docs/source/python/api/arrays.rst index e6f6c3dbbd3d1..aefed00b3d2e0 100644 --- a/docs/source/python/api/arrays.rst +++ b/docs/source/python/api/arrays.rst @@ -63,8 +63,8 @@ may expose data type-specific methods or properties. FixedSizeBinaryArray LargeBinaryArray LargeStringArray - BinaryViewArray, - StringViewArray, + BinaryViewArray + StringViewArray Time32Array Time64Array Date32Array diff --git a/docs/source/python/api/compute.rst b/docs/source/python/api/compute.rst index 928c607d139ce..5423eebfbab40 100644 --- a/docs/source/python/api/compute.rst +++ b/docs/source/python/api/compute.rst @@ -52,10 +52,10 @@ Aggregations Cumulative Functions -------------------- -Cumulative functions are vector functions that perform a running accumulation on -their input using a given binary associative operation with an identity element -(a monoid) and output an array containing the corresponding intermediate running -values. The input is expected to be of numeric type. By default these functions +Cumulative functions are vector functions that perform a running accumulation on +their input using a given binary associative operation with an identity element +(a monoid) and output an array containing the corresponding intermediate running +values. The input is expected to be of numeric type. By default these functions do not detect overflow. They are also available in an overflow-checking variant, suffixed ``_checked``, which throws an ``ArrowInvalid`` exception when overflow is detected. @@ -173,7 +173,7 @@ variants which detect domain errors where appropriate. Comparisons ----------- -These functions expect two inputs of the same type. If one of the inputs is `null` +These functions expect two inputs of the same type. If one of the inputs is ``null`` they return ``null``. .. autosummary:: @@ -540,7 +540,6 @@ Compute Options AssumeTimezoneOptions CastOptions CountOptions - CountOptions CumulativeSumOptions DayOfWeekOptions DictionaryEncodeOptions @@ -566,7 +565,6 @@ Compute Options RoundToMultipleOptions RunEndEncodeOptions ScalarAggregateOptions - ScalarAggregateOptions SelectKOptions SetLookupOptions SliceOptions @@ -578,7 +576,6 @@ Compute Options StructFieldOptions TakeOptions TDigestOptions - TDigestOptions TrimOptions VarianceOptions WeekOptions diff --git a/docs/source/python/api/substrait.rst b/docs/source/python/api/substrait.rst index 66e88fcd279ae..1556be9dbd011 100644 --- a/docs/source/python/api/substrait.rst +++ b/docs/source/python/api/substrait.rst @@ -50,4 +50,4 @@ Utility .. autosummary:: :toctree: ../generated/ - get_supported_functions \ No newline at end of file + get_supported_functions diff --git a/docs/source/python/compute.rst b/docs/source/python/compute.rst index c02059a4f8faa..c2b46c8f3f673 100644 --- a/docs/source/python/compute.rst +++ b/docs/source/python/compute.rst @@ -23,7 +23,7 @@ Compute Functions ================= Arrow supports logical compute operations over inputs of possibly -varying types. +varying types. The standard compute operations are provided by the :mod:`pyarrow.compute` module and can be used directly:: @@ -91,7 +91,7 @@ Grouped Aggregations ==================== PyArrow supports grouped aggregations over :class:`pyarrow.Table` through the -:meth:`pyarrow.Table.group_by` method. +:meth:`pyarrow.Table.group_by` method. The method will return a grouping declaration to which the hash aggregation functions can be applied:: @@ -275,7 +275,7 @@ take two datasets and join them: ds1 = ds.dataset(table1) ds2 = ds.dataset(table2) - joined_ds = ds1.join(ds2, key="id") + joined_ds = ds1.join(ds2, keys="id") The resulting dataset will be an :class:`.InMemoryDataset` containing the joined data:: @@ -300,7 +300,7 @@ Filtering by Expressions :class:`.Table` and :class:`.Dataset` can both be filtered using a boolean :class:`.Expression`. -The expression can be built starting from a +The expression can be built starting from a :func:`pyarrow.compute.field`. Comparisons and transformations can then be applied to one or more fields to build the filter expression you care about. @@ -325,7 +325,7 @@ in column ``"nums"`` by the ``bit_wise_and`` operation equals ``0``. Only the numbers where the last bit was ``0`` will return a ``0`` as the result of ``num & 1`` and as all numbers where the last bit is ``0`` are multiples of ``2`` we will be filtering for the even numbers only. - + Once we have our filter, we can provide it to the :meth:`.Table.filter` method to filter our table only for the matching rows: @@ -392,7 +392,7 @@ User-Defined Functions PyArrow allows defining and registering custom compute functions. These functions can then be called from Python as well as C++ (and potentially any other implementation wrapping Arrow C++, such as the R ``arrow`` package) -using their registered function name. +using their registered function name. UDF support is limited to scalar functions. A scalar function is a function which executes elementwise operations on arrays or scalars. In general, the output of a @@ -441,7 +441,7 @@ output type need to be defined. Using :func:`pyarrow.compute.register_scalar_fun function_docs, input_types, output_type) - + The implementation of a user-defined function always takes a first *context* parameter (named ``ctx`` in the example above) which is an instance of @@ -497,9 +497,9 @@ the GCD of one column with the scalar value 30. We will be re-using the category: [["A","B","C","D"]] Note that ``ds.field('')._call(...)`` returns a :func:`pyarrow.compute.Expression`. -The arguments passed to this function call are expressions, not scalar values +The arguments passed to this function call are expressions, not scalar values (notice the difference between :func:`pyarrow.scalar` and :func:`pyarrow.compute.scalar`, -the latter produces an expression). +the latter produces an expression). This expression is evaluated when the projection operator executes it. Projection Expressions diff --git a/docs/source/python/data.rst b/docs/source/python/data.rst index 9156157fcd0c2..4a0f2af6d4868 100644 --- a/docs/source/python/data.rst +++ b/docs/source/python/data.rst @@ -26,8 +26,8 @@ with memory buffers, like the ones explained in the documentation on :ref:`Memory and IO `. These data structures are exposed in Python through a series of interrelated classes: -* **Type Metadata**: Instances of ``pyarrow.DataType``, which describe a logical - array type +* **Type Metadata**: Instances of ``pyarrow.DataType``, which describe the + type of an array and govern how its values are interpreted * **Schemas**: Instances of ``pyarrow.Schema``, which describe a named collection of types. These can be thought of as the column types in a table-like object. @@ -55,8 +55,8 @@ array data. These include: * **Nested types**: list, map, struct, and union * **Dictionary type**: An encoded categorical type (more on this later) -Each logical data type in Arrow has a corresponding factory function for -creating an instance of that type object in Python: +Each data type in Arrow has a corresponding factory function for creating +an instance of that type object in Python: .. ipython:: python @@ -72,11 +72,11 @@ creating an instance of that type object in Python: print(t4) print(t5) -We use the name **logical type** because the **physical** storage may be the -same for one or more types. For example, ``int64``, ``float64``, and -``timestamp[ms]`` all occupy 64 bits per value. +.. note:: + Different data types might use a given physical storage. For example, + ``int64``, ``float64``, and ``timestamp[ms]`` all occupy 64 bits per value. -These objects are `metadata`; they are used for describing the data in arrays, +These objects are ``metadata``; they are used for describing the data in arrays, schemas, and record batches. In Python, they can be used in functions where the input data (e.g. Python objects) may be coerced to more than one Arrow type. @@ -99,7 +99,7 @@ types' children. For example, we can define a list of int32 values with: t6 = pa.list_(t1) t6 -A `struct` is a collection of named fields: +A ``struct`` is a collection of named fields: .. ipython:: python @@ -561,7 +561,7 @@ schema without having to get any of the batches.:: It can also be sent between languages using the :ref:`C stream interface `. -Conversion of RecordBatch do Tensor +Conversion of RecordBatch to Tensor ----------------------------------- Each array of the ``RecordBatch`` has it's own contiguous memory that is not necessarily diff --git a/docs/source/python/dataset.rst b/docs/source/python/dataset.rst index daab36f9a7be9..00469fd57becf 100644 --- a/docs/source/python/dataset.rst +++ b/docs/source/python/dataset.rst @@ -575,28 +575,28 @@ Partitioning performance considerations Partitioning datasets has two aspects that affect performance: it increases the number of files and it creates a directory structure around the files. Both of these have benefits -as well as costs. Depending on the configuration and the size of your dataset, the costs -can outweigh the benefits. +as well as costs. Depending on the configuration and the size of your dataset, the costs +can outweigh the benefits. -Because partitions split up the dataset into multiple files, partitioned datasets can be -read and written with parallelism. However, each additional file adds a little overhead in -processing for filesystem interaction. It also increases the overall dataset size since +Because partitions split up the dataset into multiple files, partitioned datasets can be +read and written with parallelism. However, each additional file adds a little overhead in +processing for filesystem interaction. It also increases the overall dataset size since each file has some shared metadata. For example, each parquet file contains the schema and -group-level statistics. The number of partitions is a floor for the number of files. If -you partition a dataset by date with a year of data, you will have at least 365 files. If -you further partition by another dimension with 1,000 unique values, you will have up to +group-level statistics. The number of partitions is a floor for the number of files. If +you partition a dataset by date with a year of data, you will have at least 365 files. If +you further partition by another dimension with 1,000 unique values, you will have up to 365,000 files. This fine of partitioning often leads to small files that mostly consist of metadata. -Partitioned datasets create nested folder structures, and those allow us to prune which +Partitioned datasets create nested folder structures, and those allow us to prune which files are loaded in a scan. However, this adds overhead to discovering files in the dataset, as we'll need to recursively "list directory" to find the data files. Too fine partitions can cause problems here: Partitioning a dataset by date for a years worth -of data will require 365 list calls to find all the files; adding another column with +of data will require 365 list calls to find all the files; adding another column with cardinality 1,000 will make that 365,365 calls. The most optimal partitioning layout will depend on your data, access patterns, and which -systems will be reading the data. Most systems, including Arrow, should work across a +systems will be reading the data. Most systems, including Arrow, should work across a range of file sizes and partitioning layouts, but there are extremes you should avoid. These guidelines can help avoid some known worst cases: @@ -611,35 +611,35 @@ of file size. Arrow's file writer provides sensible defaults for group sizing in Configuring files open during a write ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -When writing data to the disk, there are a few parameters that can be +When writing data to the disk, there are a few parameters that can be important to optimize the writes, such as the number of rows per file and the maximum number of open files allowed during the write. Set the maximum number of files opened with the ``max_open_files`` parameter of :meth:`write_dataset`. -If ``max_open_files`` is set greater than 0 then this will limit the maximum +If ``max_open_files`` is set greater than 0 then this will limit the maximum number of files that can be left open. This only applies to writing partitioned datasets, where rows are dispatched to the appropriate file depending on their partition values. If an attempt is made to open too many files then the least recently used file will be closed. If this setting is set too low you may end up fragmenting your data into many small files. -If your process is concurrently using other file handlers, either with a -dataset scanner or otherwise, you may hit a system file handler limit. For +If your process is concurrently using other file handlers, either with a +dataset scanner or otherwise, you may hit a system file handler limit. For example, if you are scanning a dataset with 300 files and writing out to 900 files, the total of 1200 files may be over a system limit. (On Linux, this might be a "Too Many Open Files" error.) You can either reduce this ``max_open_files`` setting or increase the file handler limit on your system. The default value is 900 which allows some number of files -to be open by the scanner before hitting the default Linux limit of 1024. +to be open by the scanner before hitting the default Linux limit of 1024. -Another important configuration used in :meth:`write_dataset` is ``max_rows_per_file``. +Another important configuration used in :meth:`write_dataset` is ``max_rows_per_file``. Set the maximum number of rows written in each file with the ``max_rows_per_files`` parameter of :meth:`write_dataset`. -If ``max_rows_per_file`` is set greater than 0 then this will limit how many +If ``max_rows_per_file`` is set greater than 0 then this will limit how many rows are placed in any single file. Otherwise there will be no limit and one file will be created in each output directory unless files need to be closed to respect ``max_open_files``. This setting is the primary way to control file size. @@ -653,22 +653,22 @@ Configuring rows per group during a write The volume of data written to the disk per each group can be configured. This configuration includes a lower and an upper bound. -The minimum number of rows required to form a row group is +The minimum number of rows required to form a row group is defined with the ``min_rows_per_group`` parameter of :meth:`write_dataset`. .. note:: - If ``min_rows_per_group`` is set greater than 0 then this will cause the - dataset writer to batch incoming data and only write the row groups to the - disk when sufficient rows have accumulated. The final row group size may be - less than this value if other options such as ``max_open_files`` or + If ``min_rows_per_group`` is set greater than 0 then this will cause the + dataset writer to batch incoming data and only write the row groups to the + disk when sufficient rows have accumulated. The final row group size may be + less than this value if other options such as ``max_open_files`` or ``max_rows_per_file`` force smaller row group sizes. The maximum number of rows allowed per group is defined with the ``max_rows_per_group`` parameter of :meth:`write_dataset`. -If ``max_rows_per_group`` is set greater than 0 then the dataset writer may split -up large incoming batches into multiple row groups. If this value is set then -``min_rows_per_group`` should also be set or else you may end up with very small +If ``max_rows_per_group`` is set greater than 0 then the dataset writer may split +up large incoming batches into multiple row groups. If this value is set then +``min_rows_per_group`` should also be set or else you may end up with very small row groups (e.g. if the incoming row group size is just barely larger than this value). Row groups are built into the Parquet and IPC/Feather formats but don't affect JSON or CSV. @@ -719,7 +719,7 @@ Customizing & inspecting written files By default the dataset API will create files named "part-i.format" where "i" is a integer generated during the write and "format" is the file format specified in the write_dataset call. For simple datasets it may be possible to know which files will be created but for -larger or partitioned datasets it is not so easy. The ``file_visitor`` keyword can be used +larger or partitioned datasets it is not so easy. The ``file_visitor`` keyword can be used to supply a visitor that will be called as each file is created: .. ipython:: python diff --git a/docs/source/python/dlpack.rst b/docs/source/python/dlpack.rst index f612ebabde5c9..024c2800e1107 100644 --- a/docs/source/python/dlpack.rst +++ b/docs/source/python/dlpack.rst @@ -90,4 +90,4 @@ Convert a PyArrow CPU array to PyTorch tensor: >>> import torch >>> torch.from_dlpack(array) - tensor([2, 0, 2, 4]) + tensor([2, 0, 2, 4]) diff --git a/docs/source/python/extending_types.rst b/docs/source/python/extending_types.rst index 8df0ef0b1fe99..d746505348157 100644 --- a/docs/source/python/extending_types.rst +++ b/docs/source/python/extending_types.rst @@ -101,7 +101,7 @@ define the ``__arrow_array__`` method to return an Arrow array:: import pyarrow return pyarrow.array(..., type=type) -The ``__arrow_array__`` method takes an optional `type` keyword which is passed +The ``__arrow_array__`` method takes an optional ``type`` keyword which is passed through from :func:`pyarrow.array`. The method is allowed to return either a :class:`~pyarrow.Array` or a :class:`~pyarrow.ChunkedArray`. @@ -118,7 +118,7 @@ Defining extension types ("user-defined types") Arrow has the notion of extension types in the metadata specification as a possibility to extend the built-in types. This is done by annotating any of the -built-in Arrow logical types (the "storage type") with a custom type name and +built-in Arrow data types (the "storage type") with a custom type name and optional serialized representation ("ARROW:extension:name" and "ARROW:extension:metadata" keys in the Field’s custom_metadata of an IPC message). diff --git a/docs/source/python/filesystems.rst b/docs/source/python/filesystems.rst index 5309250351d8e..23d10aaaad720 100644 --- a/docs/source/python/filesystems.rst +++ b/docs/source/python/filesystems.rst @@ -182,7 +182,7 @@ Example how you can read contents from a S3 bucket:: Note that it is important to configure :class:`S3FileSystem` with the correct -region for the bucket being used. If `region` is not set, the AWS SDK will +region for the bucket being used. If ``region`` is not set, the AWS SDK will choose a value, defaulting to 'us-east-1' if the SDK version is <1.8. Otherwise it will try to use a variety of heuristics (environment variables, configuration profile, EC2 metadata server) to resolve the region. @@ -233,7 +233,7 @@ generate a credentials file in the default location:: To connect to a public bucket without using any credentials, you must pass ``anonymous=True`` to :class:`GcsFileSystem`. Otherwise, the filesystem -will report ``Couldn't resolve host name`` since there are different host +will report ``Couldn't resolve host name`` since there are different host names for authenticated and public access. Example showing how you can read contents from a GCS bucket:: @@ -277,7 +277,7 @@ load time, since the library may not be in your LD_LIBRARY_PATH), and relies on some environment variables. * ``HADOOP_HOME``: the root of your installed Hadoop distribution. Often has - `lib/native/libhdfs.so`. + ``lib/native/libhdfs.so``. * ``JAVA_HOME``: the location of your Java SDK installation. @@ -314,7 +314,7 @@ For example:: # using this to read a partitioned dataset import pyarrow.dataset as ds ds.dataset("data/", filesystem=fs) - + Similarly for Azure Blob Storage:: import adlfs diff --git a/docs/source/python/flight.rst b/docs/source/python/flight.rst index f07b9511ccf68..b63d256547de0 100644 --- a/docs/source/python/flight.rst +++ b/docs/source/python/flight.rst @@ -17,6 +17,7 @@ .. currentmodule:: pyarrow.flight .. highlight:: python +.. _flight: ================ Arrow Flight RPC diff --git a/docs/source/python/getstarted.rst b/docs/source/python/getstarted.rst index d38fcadab288f..42e415c40b835 100644 --- a/docs/source/python/getstarted.rst +++ b/docs/source/python/getstarted.rst @@ -37,7 +37,7 @@ in tabular data. Arrow also provides support for various formats to get those tabular data in and out of disk and networks. Most commonly used formats are -Parquet (:ref:`parquet`) and the IPC format (:ref:`ipc`). +Parquet (:ref:`parquet`) and the IPC format (:ref:`ipc`). Creating Arrays and Tables -------------------------- @@ -63,7 +63,7 @@ in tabular data when attached to a column name birthdays_table = pa.table([days, months, years], names=["days", "months", "years"]) - + birthdays_table See :ref:`data` for more details. @@ -75,7 +75,7 @@ Once you have tabular data, Arrow provides out of the box the features to save and restore that data for common formats like Parquet: -.. ipython:: python +.. ipython:: python import pyarrow.parquet as pq @@ -92,14 +92,14 @@ data will be as quick as possible reloaded_birthdays Saving and loading back data in arrow is usually done through -:ref:`Parquet `, :ref:`IPC format ` (:ref:`feather`), +:ref:`Parquet `, :ref:`IPC format ` (:ref:`feather`), :ref:`CSV ` or :ref:`Line-Delimited JSON ` formats. Performing Computations ----------------------- Arrow ships with a bunch of compute functions that can be applied -to its arrays and tables, so through the compute functions +to its arrays and tables, so through the compute functions it's possible to apply transformations to the data .. ipython:: python @@ -122,7 +122,7 @@ smaller chunks import pyarrow.dataset as ds - ds.write_dataset(birthdays_table, "savedir", format="parquet", + ds.write_dataset(birthdays_table, "savedir", format="parquet", partitioning=ds.partitioning( pa.schema([birthdays_table.schema.field("years")]) )) @@ -151,8 +151,8 @@ how to project them, etc., refer to :ref:`dataset` documentation. Continuing from here -------------------- -For digging further into Arrow, you might want to read the -:doc:`PyArrow Documentation <./index>` itself or the +For digging further into Arrow, you might want to read the +:doc:`PyArrow Documentation <./index>` itself or the `Arrow Python Cookbook `_ diff --git a/docs/source/python/getting_involved.rst b/docs/source/python/getting_involved.rst index 7b3bcf2ac527a..9fda3c7c78488 100644 --- a/docs/source/python/getting_involved.rst +++ b/docs/source/python/getting_involved.rst @@ -54,7 +54,7 @@ used as foundations to build easier to use entities. exposed to the user are declared. In some cases, those files might directly import the entities from inner implementation if they want to expose it as is without modification. -* The ``lib.pyx`` file is where the majority of the core C++ libarrow +* The ``lib.pyx`` file is where the majority of the core C++ libarrow capabilities are exposed to Python. Most of the implementation of this module relies on included ``*.pxi`` files where the specific pieces are built. While being exposed to Python as ``pyarrow.lib`` its content @@ -73,4 +73,4 @@ used as foundations to build easier to use entities. PyArrow is also based on PyArrow C++, dedicated pieces of code that live in ``python/pyarrow/src/arrow/python`` directory and provide the low level code for capabilities like converting to and from numpy or pandas and the classes - that allow to use Python objects and callbacks in C++. \ No newline at end of file + that allow to use Python objects and callbacks in C++. diff --git a/docs/source/python/install.rst b/docs/source/python/install.rst index 4b966e6d2653d..84d6253691f09 100644 --- a/docs/source/python/install.rst +++ b/docs/source/python/install.rst @@ -39,6 +39,13 @@ Install the latest version of PyArrow from conda install -c conda-forge pyarrow +.. note:: + + While the ``pyarrow`` `conda-forge `_ package is + the right choice for most users, both a minimal and maximal variant of the + package exist, either of which may be better for your use case. See + :ref:`python-conda-differences`. + Using Pip --------- @@ -83,7 +90,7 @@ While Arrow uses the OS-provided timezone database on Linux and macOS, it requir user-provided database on Windows. To download and extract the text version of the IANA timezone database follow the instructions in the C++ :ref:`download-timezone-database` or use pyarrow utility function -`pyarrow.util.download_tzdata_on_windows()` that does the same. +``pyarrow.util.download_tzdata_on_windows()`` that does the same. By default, the timezone database will be detected at ``%USERPROFILE%\Downloads\tzdata``. If the database has been downloaded in a different location, you will need to set @@ -93,3 +100,85 @@ a custom path to the database from Python: >>> import pyarrow as pa >>> pa.set_timezone_db_path("custom_path") + + +.. _python-conda-differences: + +Differences between conda-forge packages +---------------------------------------- + +On `conda-forge `_, PyArrow is published as three +separate packages, each providing varying levels of functionality. This is in +contrast to PyPi, where only a single PyArrow package is provided. + +The purpose of this split is to minimize the size of the installed package for +most users (``pyarrow``), provide a smaller, minimal package for specialized use +cases (``pyarrow-core``), while still providing a complete package for users who +require it (``pyarrow-all``). What was historically ``pyarrow`` on +`conda-forge `_ is now ``pyarrow-all``, though most +users can continue using ``pyarrow``. + +The ``pyarrow-core`` package includes the following functionality: + +- :ref:`data` +- :ref:`compute` (i.e., ``pyarrow.compute``) +- :ref:`io` +- :ref:`ipc` (i.e., ``pyarrow.ipc``) +- :ref:`filesystem` (i.e., ``pyarrow.fs``. Note: It's planned to move cloud fileystems (i.e., :ref:`S3`, :ref:`GCS`, etc) into ``pyarrow`` in a future release though :ref:`filesystem-localfs` will remain in ``pyarrow-core``.) +- File formats: :ref:`Arrow/Feather`, :ref:`JSON`, :ref:`CSV`, :ref:`ORC` (but not Parquet) + +The ``pyarrow`` package adds the following: + +- Acero (i.e., ``pyarrow.acero``) +- :ref:`dataset` (i.e., ``pyarrow.dataset``) +- :ref:`Parquet` (i.e., ``pyarrow.parquet``) +- Substrait (i.e., ``pyarrow.substrait``) + +Finally, ``pyarrow-all`` adds: + +- :ref:`flight` and Flight SQL (i.e., ``pyarrow.flight``) +- Gandiva (i.e., ``pyarrow.gandiva``) + +The following table lists the functionality provided by each package and may be +useful when deciding to use one package over another or when +:ref:`python-conda-custom-selection`. + ++------------+---------------------+--------------+---------+-------------+ +| Component | Package | pyarrow-core | pyarrow | pyarrow-all | ++------------+---------------------+--------------+---------+-------------+ +| Core | pyarrow-core | ✓ | ✓ | ✓ | ++------------+---------------------+--------------+---------+-------------+ +| Parquet | libparquet | | ✓ | ✓ | ++------------+---------------------+--------------+---------+-------------+ +| Dataset | libarrow-dataset | | ✓ | ✓ | ++------------+---------------------+--------------+---------+-------------+ +| Acero | libarrow-acero | | ✓ | ✓ | ++------------+---------------------+--------------+---------+-------------+ +| Substrait | libarrow-substrait | | ✓ | ✓ | ++------------+---------------------+--------------+---------+-------------+ +| Flight | libarrow-flight | | | ✓ | ++------------+---------------------+--------------+---------+-------------+ +| Flight SQL | libarrow-flight-sql | | | ✓ | ++------------+---------------------+--------------+---------+-------------+ +| Gandiva | libarrow-gandiva | | | ✓ | ++------------+---------------------+--------------+---------+-------------+ + +.. _python-conda-custom-selection: + +Creating A Custom Selection +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +If you know which components you need and want to control what's installed, you +can create a custom selection of packages to include only the extra features you +need. For example, to install ``pyarrow-core`` and add support for reading and +writing Parquet, install ``libparquet`` alongside ``pyarrow-core``: + +.. code-block:: shell + + conda install -c conda-forge pyarrow-core libparquet + +Or if you wish to use ``pyarrow`` but need support for Flight RPC: + +.. code-block:: shell + + conda install -c conda-forge pyarrow libarrow-flight diff --git a/docs/source/python/integration/extending.rst b/docs/source/python/integration/extending.rst index b380fea7e902c..d4d099bcf43c8 100644 --- a/docs/source/python/integration/extending.rst +++ b/docs/source/python/integration/extending.rst @@ -474,7 +474,7 @@ Toolchain Compatibility (Linux) The Python wheels for Linux are built using the `PyPA manylinux images `_ which use -the CentOS `devtoolset-9`. In addition to the other notes +the CentOS ``devtoolset-9``. In addition to the other notes above, if you are compiling C++ using these shared libraries, you will need to make sure you use a compatible toolchain as well or you might see a segfault during runtime. diff --git a/docs/source/python/integration/python_r.rst b/docs/source/python/integration/python_r.rst index 20627c3782d3c..ec5dfc366fdf9 100644 --- a/docs/source/python/integration/python_r.rst +++ b/docs/source/python/integration/python_r.rst @@ -29,7 +29,7 @@ marshaling and unmarshaling data. The article takes for granted that you have a ``Python`` environment with ``pyarrow`` correctly installed and an ``R`` environment with - ``arrow`` library correctly installed. + ``arrow`` library correctly installed. See `Python Install Instructions `_ and `R Install instructions `_ for further details. @@ -52,7 +52,7 @@ We could save such a function in a ``addthree.R`` file so that we can make it available for reuse. Once the ``addthree.R`` file is created we can invoke any of its functions -from Python using the +from Python using the `rpy2 `_ library which enables a R runtime within the Python interpreter. @@ -91,12 +91,12 @@ to access the ``R`` function and print the expected result: .. code-block:: bash - $ python addthree.py + $ python addthree.py 6 If instead of passing around basic data types we want to pass around Arrow Arrays, we can do so relying on the -`rpy2-arrow `_ +`rpy2-arrow `_ module which implements ``rpy2`` support for Arrow types. ``rpy2-arrow`` can be installed through ``pip``: @@ -189,7 +189,7 @@ Invoking the ``addthree.R`` script will print the outcome of adding .. code-block:: bash - $ R --silent -f addthree.R + $ R --silent -f addthree.R Array [ @@ -219,7 +219,7 @@ necessary to import an Arrow Array in R from the C Data interface. That work will be done by the ``addthree_cdata`` function which invokes the ``addthree`` function once the Array is imported. -Our ``addthree.R`` will thus have both the ``addthree_cdata`` and the +Our ``addthree.R`` will thus have both the ``addthree_cdata`` and the ``addthree`` functions: .. code-block:: R @@ -261,7 +261,7 @@ Our ``addthree.py`` will thus become: # Import the pyarrow module that provides access to the C Data interface from pyarrow.cffi import ffi as arrow_c - # Allocate structures where we will export the Array data + # Allocate structures where we will export the Array data # and the Array schema. They will be released when we exit the with block. with arrow_c.new("struct ArrowArray*") as c_array, \ arrow_c.new("struct ArrowSchema*") as c_schema: @@ -274,7 +274,7 @@ Our ``addthree.py`` will thus become: array.type._export_to_c(c_schema_ptr) # Invoke the R addthree_cdata function passing the references - # to the array and schema C Data structures. + # to the array and schema C Data structures. # Those references are passed as strings as R doesn't have # native support for 64bit integers, so the integers are # converted to their string representation for R to convert it back. @@ -289,19 +289,19 @@ Our ``addthree.py`` will thus become: # Once the returned array is exported to a C Data infrastructure # we can import it back into pyarrow using Array._import_from_c py_array = pyarrow.Array._import_from_c(c_array_ptr, c_schema_ptr) - + print("RESULT", py_array) Running the newly changed ``addthree.py`` will now print the Array resulting -from adding ``3`` to all the elements of the original +from adding ``3`` to all the elements of the original ``pyarrow.array((1, 2, 3))`` array: .. code-block:: bash - $ python addthree.py + $ python addthree.py R[write to console]: Attaching package: ‘arrow’ RESULT [ 4, 5, 6 - ] \ No newline at end of file + ] diff --git a/docs/source/python/ipc.rst b/docs/source/python/ipc.rst index 27cd14a68853d..f55e8f8bc5dc3 100644 --- a/docs/source/python/ipc.rst +++ b/docs/source/python/ipc.rst @@ -76,12 +76,12 @@ this one can be created with :func:`~pyarrow.ipc.new_stream`: .. ipython:: python sink = pa.BufferOutputStream() - + with pa.ipc.new_stream(sink, batch.schema) as writer: for i in range(5): writer.write_batch(batch) -Here we used an in-memory Arrow buffer stream (``sink``), +Here we used an in-memory Arrow buffer stream (``sink``), but this could have been a socket or some other IO sink. When creating the ``StreamWriter``, we pass the schema, since the schema @@ -102,7 +102,7 @@ convenience function ``pyarrow.ipc.open_stream``: with pa.ipc.open_stream(buf) as reader: schema = reader.schema batches = [b for b in reader] - + schema len(batches) @@ -126,7 +126,7 @@ The :class:`~pyarrow.RecordBatchFileWriter` has the same API as .. ipython:: python sink = pa.BufferOutputStream() - + with pa.ipc.new_file(sink, batch.schema) as writer: for i in range(10): writer.write_batch(batch) @@ -164,7 +164,7 @@ DataFrame output: with pa.ipc.open_file(buf) as reader: df = reader.read_pandas() - + df[:5] Efficiently Writing and Reading Arrow Data diff --git a/docs/source/python/json.rst b/docs/source/python/json.rst index 99ecbc19a1230..eff6135d895a7 100644 --- a/docs/source/python/json.rst +++ b/docs/source/python/json.rst @@ -21,7 +21,7 @@ Reading JSON files ================== -Arrow supports reading columnar data from line-delimited JSON files. +Arrow supports reading columnar data from line-delimited JSON files. In this context, a JSON file consists of multiple JSON objects, one per line, representing individual data rows. For example, this file represents two rows of data with four columns "a", "b", "c", "d": diff --git a/docs/source/python/memory.rst b/docs/source/python/memory.rst index 23474b923718d..7b49d48ab20fa 100644 --- a/docs/source/python/memory.rst +++ b/docs/source/python/memory.rst @@ -46,7 +46,7 @@ parent-child relationships. There are many implementations of ``arrow::Buffer``, but they all provide a standard interface: a data pointer and length. This is similar to Python's -built-in `buffer protocol` and ``memoryview`` objects. +built-in ``buffer protocol`` and ``memoryview`` objects. A :class:`Buffer` can be created from any Python object implementing the buffer protocol by calling the :func:`py_buffer` function. Let's consider diff --git a/docs/source/python/orc.rst b/docs/source/python/orc.rst index bfa68fc34d895..76c293d742010 100644 --- a/docs/source/python/orc.rst +++ b/docs/source/python/orc.rst @@ -112,7 +112,7 @@ control various settings when writing an ORC file. * ``file_version``, the ORC format version to use. ``'0.11'`` ensures compatibility with older readers, while ``'0.12'`` is the newer one. -* ``stripe_size``, to control the approximate size of data within a column +* ``stripe_size``, to control the approximate size of data within a column stripe. This currently defaults to 64MB. See the :func:`~pyarrow.orc.write_table()` docstring for more details. diff --git a/docs/source/python/parquet.rst b/docs/source/python/parquet.rst index d4717897660b6..029ed4f1a3e15 100644 --- a/docs/source/python/parquet.rst +++ b/docs/source/python/parquet.rst @@ -32,7 +32,7 @@ performance data IO. Apache Arrow is an ideal in-memory transport layer for data that is being read or written with Parquet files. We have been concurrently developing the `C++ -implementation of +implementation of Apache Parquet `_, which includes a native, multithreaded C++ adapter to and from in-memory Arrow data. PyArrow includes Python bindings to this code, which thus enables reading diff --git a/docs/source/python/timestamps.rst b/docs/source/python/timestamps.rst index 64a2a354dddef..80a1b7280cbfa 100644 --- a/docs/source/python/timestamps.rst +++ b/docs/source/python/timestamps.rst @@ -24,7 +24,7 @@ Arrow/Pandas Timestamps Arrow timestamps are stored as a 64-bit integer with column metadata to associate a time unit (e.g. milliseconds, microseconds, or nanoseconds), and an -optional time zone. Pandas (`Timestamp`) uses a 64-bit integer representing +optional time zone. Pandas (``Timestamp``) uses a 64-bit integer representing nanoseconds and an optional time zone. Python/Pandas timestamp types without a associated time zone are referred to as "Time Zone Naive". Python/Pandas timestamp types with an associated time zone are @@ -51,8 +51,8 @@ This implies a few things when round-tripping timestamps: #. Timezone information is lost (all timestamps that result from converting from spark to arrow/pandas are "time zone naive"). #. Timestamps are truncated to microseconds. -#. The session time zone might have unintuitive impacts on - translation of timestamp values. +#. The session time zone might have unintuitive impacts on + translation of timestamp values. Spark to Pandas (through Apache Arrow) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -62,8 +62,8 @@ The following cases assume the Spark configuration :: - >>> pdf = pd.DataFrame({'naive': [datetime(2019, 1, 1, 0)], - ... 'aware': [Timestamp(year=2019, month=1, day=1, + >>> pdf = pd.DataFrame({'naive': [datetime(2019, 1, 1, 0)], + ... 'aware': [Timestamp(year=2019, month=1, day=1, ... nanosecond=500, tz=timezone(timedelta(hours=-8)))]}) >>> pdf naive aware @@ -77,7 +77,7 @@ The following cases assume the Spark configuration +-------------------+-------------------+ |2019-01-01 00:00:00|2019-01-01 08:00:00| +-------------------+-------------------+ - + Note that conversion of the aware timestamp is shifted to reflect the time assuming UTC (it represents the same instant in time). For naive timestamps, Spark treats them as being in the system local @@ -129,7 +129,7 @@ session time zone is still PST: |2019-01-01 00:00:00|2019-01-01 00:00:00| +-------------------+-------------------+ - + >>> pst_df.toPandas() naive aware 0 2019-01-01 2019-01-01 @@ -141,7 +141,7 @@ session time zone is still PST: aware 1 non-null datetime64[ns] dtypes: datetime64[ns](2) memory usage: 96.0 bytes - + Notice that, in addition to being a "time zone naive" timestamp, the 'aware' value will now differ when converting to an epoch offset. Spark does the conversion by first converting to the session time zone (or system local time zone if @@ -158,9 +158,9 @@ time: >>> (pst_df.toPandas()['aware'][0].timestamp()-pdf['aware'][0].timestamp())/3600 -8.0 -The same type of conversion happens with the data frame converted while -the session time zone was UTC. In this case both naive and aware -represent different instants in time (the naive instant is due to +The same type of conversion happens with the data frame converted while +the session time zone was UTC. In this case both naive and aware +represent different instants in time (the naive instant is due to the change in session time zone between creating data frames): :: @@ -179,9 +179,9 @@ the change in session time zone between creating data frames): Note that the surprising shift for aware doesn't happen when the session time zone is UTC (but the timestamps still become "time zone naive"): - + :: - + >>> spark.conf.set("spark.sql.session.timeZone", "UTC") >>> pst_df.show() +-------------------+-------------------+ @@ -189,7 +189,7 @@ still become "time zone naive"): +-------------------+-------------------+ |2019-01-01 08:00:00|2019-01-01 08:00:00| +-------------------+-------------------+ - + >>> pst_df.toPandas()['aware'][0] Timestamp('2019-01-01 08:00:00') >>> pdf['aware'][0] diff --git a/docs/source/status.rst b/docs/source/status.rst index f4672d6b4bc55..266381175608a 100644 --- a/docs/source/status.rst +++ b/docs/source/status.rst @@ -353,7 +353,7 @@ Third-Party Data Formats | Format | C++ | Java | Go | JS | C# | Rust | Julia | Swift | | | | | | | | | | | +=============================+=========+=========+=======+====+=======+=======+=======+=======+ -| Avro | | R | | | | | | | +| Avro | | R | R | | | | | | +-----------------------------+---------+---------+-------+----+-------+-------+-------+-------+ | CSV | R/W | R (2) | R/W | | | R/W | R/W | | +-----------------------------+---------+---------+-------+----+-------+-------+-------+-------+ diff --git a/format/FlightSql.proto b/format/FlightSql.proto index bf3fcb6c3d229..6fca141d692a7 100644 --- a/format/FlightSql.proto +++ b/format/FlightSql.proto @@ -43,7 +43,6 @@ package arrow.flight.protocol.sql; * where there is one row per requested piece of metadata information. */ message CommandGetSqlInfo { - option (experimental) = true; /* * Values are modelled after ODBC's SQLGetInfo() function. This information is intended to provide @@ -1131,7 +1130,6 @@ enum Searchable { * The returned data should be ordered by data_type and then by type_name. */ message CommandGetXdbcTypeInfo { - option (experimental) = true; /* * Specifies the data type to search for the info. @@ -1153,7 +1151,6 @@ message CommandGetXdbcTypeInfo { * The returned data should be ordered by catalog_name. */ message CommandGetCatalogs { - option (experimental) = true; } /* @@ -1171,7 +1168,6 @@ message CommandGetCatalogs { * The returned data should be ordered by catalog_name, then db_schema_name. */ message CommandGetDbSchemas { - option (experimental) = true; /* * Specifies the Catalog to search for the tables. @@ -1219,7 +1215,6 @@ message CommandGetDbSchemas { * The returned data should be ordered by catalog_name, db_schema_name, table_name, then table_type, followed by table_schema if requested. */ message CommandGetTables { - option (experimental) = true; /* * Specifies the Catalog to search for the tables. @@ -1272,7 +1267,6 @@ message CommandGetTables { * The returned data should be ordered by table_type. */ message CommandGetTableTypes { - option (experimental) = true; } /* @@ -1293,7 +1287,6 @@ message CommandGetTableTypes { * The returned data should be ordered by catalog_name, db_schema_name, table_name, key_name, then key_sequence. */ message CommandGetPrimaryKeys { - option (experimental) = true; /* * Specifies the catalog to search for the table. @@ -1348,7 +1341,6 @@ enum UpdateDeleteRules { * update_rule and delete_rule returns a byte that is equivalent to actions declared on UpdateDeleteRules enum. */ message CommandGetExportedKeys { - option (experimental) = true; /* * Specifies the catalog to search for the foreign key table. @@ -1399,7 +1391,6 @@ message CommandGetExportedKeys { * - 4 = SET DEFAULT */ message CommandGetImportedKeys { - option (experimental) = true; /* * Specifies the catalog to search for the primary key table. @@ -1452,7 +1443,6 @@ message CommandGetImportedKeys { * - 4 = SET DEFAULT */ message CommandGetCrossReference { - option (experimental) = true; /** * The catalog name where the parent table is. @@ -1499,7 +1489,6 @@ message CommandGetCrossReference { * Request message for the "CreatePreparedStatement" action on a Flight SQL enabled backend. */ message ActionCreatePreparedStatementRequest { - option (experimental) = true; // The valid SQL string to create a prepared statement for. string query = 1; @@ -1512,7 +1501,6 @@ message ActionCreatePreparedStatementRequest { * An embedded message describing a Substrait plan to execute. */ message SubstraitPlan { - option (experimental) = true; // The serialized substrait.Plan to create a prepared statement for. // XXX(ARROW-16902): this is bytes instead of an embedded message @@ -1529,7 +1517,6 @@ message SubstraitPlan { * Request message for the "CreatePreparedSubstraitPlan" action on a Flight SQL enabled backend. */ message ActionCreatePreparedSubstraitPlanRequest { - option (experimental) = true; // The serialized substrait.Plan to create a prepared statement for. SubstraitPlan plan = 1; @@ -1548,7 +1535,6 @@ message ActionCreatePreparedSubstraitPlanRequest { * The result should be wrapped in a google.protobuf.Any message. */ message ActionCreatePreparedStatementResult { - option (experimental) = true; // Opaque handle for the prepared statement on the server. bytes prepared_statement_handle = 1; @@ -1570,7 +1556,6 @@ message ActionCreatePreparedStatementResult { * Closes server resources associated with the prepared statement handle. */ message ActionClosePreparedStatementRequest { - option (experimental) = true; // Opaque handle for the prepared statement on the server. bytes prepared_statement_handle = 1; @@ -1581,7 +1566,6 @@ message ActionClosePreparedStatementRequest { * Begins a transaction. */ message ActionBeginTransactionRequest { - option (experimental) = true; } /* @@ -1592,7 +1576,6 @@ message ActionBeginTransactionRequest { * FLIGHT_SQL_TRANSACTION_SUPPORT_SAVEPOINT. */ message ActionBeginSavepointRequest { - option (experimental) = true; // The transaction to which a savepoint belongs. bytes transaction_id = 1; @@ -1610,7 +1593,6 @@ message ActionBeginSavepointRequest { * The result should be wrapped in a google.protobuf.Any message. */ message ActionBeginTransactionResult { - option (experimental) = true; // Opaque handle for the transaction on the server. bytes transaction_id = 1; @@ -1626,7 +1608,6 @@ message ActionBeginTransactionResult { * The result should be wrapped in a google.protobuf.Any message. */ message ActionBeginSavepointResult { - option (experimental) = true; // Opaque handle for the savepoint on the server. bytes savepoint_id = 1; @@ -1641,7 +1622,6 @@ message ActionBeginSavepointResult { * invalidated, as are all associated savepoints. */ message ActionEndTransactionRequest { - option (experimental) = true; enum EndTransaction { END_TRANSACTION_UNSPECIFIED = 0; @@ -1667,7 +1647,6 @@ message ActionEndTransactionRequest { * savepoints created after the current savepoint. */ message ActionEndSavepointRequest { - option (experimental) = true; enum EndSavepoint { END_SAVEPOINT_UNSPECIFIED = 0; @@ -1702,7 +1681,6 @@ message ActionEndSavepointRequest { * - GetFlightInfo: execute the query. */ message CommandStatementQuery { - option (experimental) = true; // The SQL syntax. string query = 1; @@ -1729,7 +1707,6 @@ message CommandStatementQuery { * - DoPut: execute the query. */ message CommandStatementSubstraitPlan { - option (experimental) = true; // A serialized substrait.Plan SubstraitPlan plan = 1; @@ -1742,7 +1719,6 @@ message CommandStatementSubstraitPlan { * This should be used only once and treated as an opaque value, that is, clients should not attempt to parse this. */ message TicketStatementQuery { - option (experimental) = true; // Unique identifier for the instance of the statement to execute. bytes statement_handle = 1; @@ -1770,7 +1746,6 @@ message TicketStatementQuery { * - GetFlightInfo: execute the prepared statement instance. */ message CommandPreparedStatementQuery { - option (experimental) = true; // Opaque handle for the prepared statement on the server. bytes prepared_statement_handle = 1; @@ -1781,7 +1756,6 @@ message CommandPreparedStatementQuery { * for the RPC call DoPut to cause the server to execute the included SQL update. */ message CommandStatementUpdate { - option (experimental) = true; // The SQL syntax. string query = 1; @@ -1795,7 +1769,6 @@ message CommandStatementUpdate { * prepared statement handle as an update. */ message CommandPreparedStatementUpdate { - option (experimental) = true; // Opaque handle for the prepared statement on the server. bytes prepared_statement_handle = 1; @@ -1807,7 +1780,6 @@ message CommandPreparedStatementUpdate { * FlightData into the target destination. */ message CommandStatementIngest { - option (experimental) = true; // Options for table definition behavior message TableDefinitionOptions { @@ -1866,7 +1838,6 @@ message CommandStatementIngest { * in the request, containing results from the update. */ message DoPutUpdateResult { - option (experimental) = true; // The number of records updated. A return value of -1 represents // an unknown updated record count. @@ -1880,7 +1851,6 @@ message DoPutUpdateResult { * can continue as though the fields in this message were not provided or set to sensible default values. */ message DoPutPreparedStatementResult { - option (experimental) = true; // Represents a (potentially updated) opaque handle for the prepared statement on the server. // Because the handle could potentially be updated, any previous handles for this prepared @@ -1912,7 +1882,6 @@ message DoPutPreparedStatementResult { */ message ActionCancelQueryRequest { option deprecated = true; - option (experimental) = true; // The result of the GetFlightInfo RPC that initiated the query. // XXX(ARROW-16902): this must be a serialized FlightInfo, but is @@ -1931,7 +1900,6 @@ message ActionCancelQueryRequest { */ message ActionCancelQueryResult { option deprecated = true; - option (experimental) = true; enum CancelResult { // The cancellation status is unknown. Servers should avoid using diff --git a/go/README.md b/go/README.md index 4f97c49e1c7e8..220b0a230a615 100644 --- a/go/README.md +++ b/go/README.md @@ -48,7 +48,7 @@ func main() { DSN option keys are expressed as `k=v`, delimited with `;`. Some options keys are defined in ADBC, others are defined in the FlightSQL ADBC driver. -- Arrow ADBC [developer doc](https://arrow.apache.org/adbc/main/driver/go/flight_sql.html#client-options) +- Arrow ADBC [developer doc](https://arrow.apache.org/adbc/main/driver/flight_sql.html#client-options) - ADBC [source code](https://github.com/apache/arrow-adbc/blob/3d12fad1bae21029a8ff25604d6e65760c3f65bd/go/adbc/adbc.go#L149-L158) - FlightSQL driver option keys [source code](https://github.com/apache/arrow-adbc/blob/3d12fad1bae21029a8ff25604d6e65760c3f65bd/go/adbc/driver/flightsql/flightsql_adbc.go#L70-L81) diff --git a/go/arrow/array/bufferbuilder_numeric_test.go b/go/arrow/array/bufferbuilder_numeric_test.go index df48dcff2e70f..372ba6976269d 100644 --- a/go/arrow/array/bufferbuilder_numeric_test.go +++ b/go/arrow/array/bufferbuilder_numeric_test.go @@ -20,8 +20,8 @@ import ( "testing" "unsafe" - "github.com/apache/arrow/go/v17/arrow/memory" "github.com/apache/arrow/go/v17/arrow/endian" + "github.com/apache/arrow/go/v17/arrow/memory" "github.com/stretchr/testify/assert" ) diff --git a/go/arrow/array/numeric.gen.go b/go/arrow/array/numeric.gen.go index 1d65657c5fae8..b962cda40b8b3 100644 --- a/go/arrow/array/numeric.gen.go +++ b/go/arrow/array/numeric.gen.go @@ -307,7 +307,7 @@ func (a *Float64) MarshalJSON() ([]byte, error) { default: vals[i] = f } - + } return json.Marshal(vals) diff --git a/go/arrow/array/numeric_test.go b/go/arrow/array/numeric_test.go index f775035c66652..3013d45acbb2b 100644 --- a/go/arrow/array/numeric_test.go +++ b/go/arrow/array/numeric_test.go @@ -16,7 +16,7 @@ package array_test -import ( +import ( "math" "reflect" "testing" @@ -144,7 +144,7 @@ func TestFloat16MarshalJSON(t *testing.T) { bldr := array.NewFloat16Builder(pool) defer bldr.Release() - + jsonstr := `[0, 1, 2, 3, "NaN", "NaN", 4, 5, "+Inf", "-Inf"]` bldr.Append(float16.New(0)) @@ -158,7 +158,6 @@ func TestFloat16MarshalJSON(t *testing.T) { bldr.Append(float16.Inf()) bldr.Append(float16.Inf().Negate()) - expected := bldr.NewFloat16Array() defer expected.Release() expected_json, err := expected.MarshalJSON() @@ -172,7 +171,7 @@ func TestFloat32MarshalJSON(t *testing.T) { bldr := array.NewFloat32Builder(pool) defer bldr.Release() - + jsonstr := `[0, 1, "+Inf", 2, 3, "NaN", "NaN", 4, 5, "-Inf"]` bldr.Append(0) @@ -186,10 +185,9 @@ func TestFloat32MarshalJSON(t *testing.T) { bldr.Append(5) bldr.Append(float32(math.Inf(-1))) - expected := bldr.NewFloat32Array() defer expected.Release() - + expected_json, err := expected.MarshalJSON() assert.NoError(t, err) @@ -223,7 +221,7 @@ func TestFloat64MarshalJSON(t *testing.T) { assert.NoError(t, err) assert.JSONEq(t, jsonstr, string(expected_json)) - + } func TestUnmarshalSpecialFloat(t *testing.T) { diff --git a/go/arrow/array/numericbuilder.gen_test.go b/go/arrow/array/numericbuilder.gen_test.go index 43b14c1868666..b43aa7f807090 100644 --- a/go/arrow/array/numericbuilder.gen_test.go +++ b/go/arrow/array/numericbuilder.gen_test.go @@ -648,9 +648,9 @@ func TestFloat64BuilderUnmarshalJSON(t *testing.T) { arr := bldr.NewFloat64Array() defer arr.Release() - + assert.NotNil(t, arr) - + assert.False(t, math.IsInf(float64(arr.Value(0)), 0), arr.Value(0)) assert.True(t, math.IsInf(float64(arr.Value(2)), 1), arr.Value(2)) assert.True(t, math.IsNaN(float64(arr.Value(5))), arr.Value(5)) @@ -1276,9 +1276,9 @@ func TestFloat32BuilderUnmarshalJSON(t *testing.T) { arr := bldr.NewFloat32Array() defer arr.Release() - + assert.NotNil(t, arr) - + assert.False(t, math.IsInf(float64(arr.Value(0)), 0), arr.Value(0)) assert.True(t, math.IsInf(float64(arr.Value(2)), 1), arr.Value(2)) assert.True(t, math.IsNaN(float64(arr.Value(5))), arr.Value(5)) diff --git a/go/arrow/array/record_test.go b/go/arrow/array/record_test.go index 36bb0eaa4c511..be6a26eb1a6ba 100644 --- a/go/arrow/array/record_test.go +++ b/go/arrow/array/record_test.go @@ -94,7 +94,7 @@ func TestRecord(t *testing.T) { if _, err := rec.SetColumn(0, col2_1); err == nil { t.Fatalf("expected an error") } - newRec, err := rec.SetColumn(1, col2_1); + newRec, err := rec.SetColumn(1, col2_1) if err != nil { t.Fatalf("unexpected error: %v", err) } diff --git a/go/arrow/cdata/cdata.go b/go/arrow/cdata/cdata.go index b86898277bf47..00d1f351eaf11 100644 --- a/go/arrow/cdata/cdata.go +++ b/go/arrow/cdata/cdata.go @@ -448,6 +448,7 @@ func (imp *cimporter) doImportArr(src *CArrowArray) error { defer func() { if imp.alloc.bufCount == 0 { C.ArrowArrayRelease(imp.arr) + C.free(unsafe.Pointer(imp.arr)) } }() diff --git a/go/arrow/csv/common.go b/go/arrow/csv/common.go index 4455c8b782167..06fed69a77fe5 100644 --- a/go/arrow/csv/common.go +++ b/go/arrow/csv/common.go @@ -239,21 +239,31 @@ func WithStringsReplacer(replacer *strings.Replacer) Option { func validate(schema *arrow.Schema) { for i, f := range schema.Fields() { - switch ft := f.Type.(type) { - case *arrow.BooleanType: - case *arrow.Int8Type, *arrow.Int16Type, *arrow.Int32Type, *arrow.Int64Type: - case *arrow.Uint8Type, *arrow.Uint16Type, *arrow.Uint32Type, *arrow.Uint64Type: - case *arrow.Float16Type, *arrow.Float32Type, *arrow.Float64Type: - case *arrow.StringType, *arrow.LargeStringType: - case *arrow.TimestampType: - case *arrow.Date32Type, *arrow.Date64Type: - case *arrow.Decimal128Type, *arrow.Decimal256Type: - case *arrow.ListType, *arrow.LargeListType, *arrow.FixedSizeListType: - case *arrow.BinaryType, *arrow.LargeBinaryType, *arrow.FixedSizeBinaryType: - case arrow.ExtensionType: - case *arrow.NullType: - default: - panic(fmt.Errorf("arrow/csv: field %d (%s) has invalid data type %T", i, f.Name, ft)) + if !typeSupported(f.Type) { + panic(fmt.Errorf("arrow/csv: field %d (%s) has invalid data type %T", i, f.Name, f.Type)) } } } + +func typeSupported(dt arrow.DataType) bool { + switch dt := dt.(type) { + case *arrow.BooleanType: + case *arrow.Int8Type, *arrow.Int16Type, *arrow.Int32Type, *arrow.Int64Type: + case *arrow.Uint8Type, *arrow.Uint16Type, *arrow.Uint32Type, *arrow.Uint64Type: + case *arrow.Float16Type, *arrow.Float32Type, *arrow.Float64Type: + case *arrow.StringType, *arrow.LargeStringType: + case *arrow.TimestampType: + case *arrow.Date32Type, *arrow.Date64Type: + case *arrow.Decimal128Type, *arrow.Decimal256Type: + case *arrow.MapType: + return false + case arrow.ListLikeType: + return typeSupported(dt.Elem()) + case *arrow.BinaryType, *arrow.LargeBinaryType, *arrow.FixedSizeBinaryType: + case arrow.ExtensionType: + case *arrow.NullType: + default: + return false + } + return true +} diff --git a/go/arrow/csv/reader.go b/go/arrow/csv/reader.go index 18f1083e6a9dc..46591a9a5adee 100644 --- a/go/arrow/csv/reader.go +++ b/go/arrow/csv/reader.go @@ -474,6 +474,10 @@ func (r *Reader) initFieldConverter(bldr array.Builder) func(string) { return func(str string) { r.parseDate32(bldr, str) } + case *arrow.Date64Type: + return func(str string) { + r.parseDate64(bldr, str) + } case *arrow.Time32Type: return func(str string) { r.parseTime32(bldr, str, dt.Unit) @@ -486,17 +490,13 @@ func (r *Reader) initFieldConverter(bldr array.Builder) func(string) { return func(str string) { r.parseDecimal256(bldr, str, dt.Precision, dt.Scale) } - case *arrow.ListType: - return func(s string) { - r.parseList(bldr, s) - } - case *arrow.LargeListType: + case *arrow.FixedSizeListType: return func(s string) { - r.parseLargeList(bldr, s) + r.parseFixedSizeList(bldr.(*array.FixedSizeListBuilder), s, int(dt.Len())) } - case *arrow.FixedSizeListType: + case arrow.ListLikeType: return func(s string) { - r.parseFixedSizeList(bldr, s, int(dt.Len())) + r.parseListLike(bldr.(array.ListLikeBuilder), s) } case *arrow.BinaryType: return func(s string) { @@ -740,81 +740,67 @@ func (r *Reader) parseDate32(field array.Builder, str string) { field.(*array.Date32Builder).Append(arrow.Date32FromTime(tm)) } -func (r *Reader) parseTime32(field array.Builder, str string, unit arrow.TimeUnit) { +func (r *Reader) parseDate64(field array.Builder, str string) { if r.isNull(str) { field.AppendNull() return } - val, err := arrow.Time32FromString(str, unit) + tm, err := time.Parse("2006-01-02", str) if err != nil && r.err == nil { r.err = err field.AppendNull() return } - field.(*array.Time32Builder).Append(val) + field.(*array.Date64Builder).Append(arrow.Date64FromTime(tm)) } -func (r *Reader) parseDecimal128(field array.Builder, str string, prec, scale int32) { +func (r *Reader) parseTime32(field array.Builder, str string, unit arrow.TimeUnit) { if r.isNull(str) { field.AppendNull() return } - val, err := decimal128.FromString(str, prec, scale) + val, err := arrow.Time32FromString(str, unit) if err != nil && r.err == nil { r.err = err field.AppendNull() return } - field.(*array.Decimal128Builder).Append(val) + field.(*array.Time32Builder).Append(val) } -func (r *Reader) parseDecimal256(field array.Builder, str string, prec, scale int32) { +func (r *Reader) parseDecimal128(field array.Builder, str string, prec, scale int32) { if r.isNull(str) { field.AppendNull() return } - val, err := decimal256.FromString(str, prec, scale) + val, err := decimal128.FromString(str, prec, scale) if err != nil && r.err == nil { r.err = err field.AppendNull() return } - field.(*array.Decimal256Builder).Append(val) + field.(*array.Decimal128Builder).Append(val) } -func (r *Reader) parseList(field array.Builder, str string) { +func (r *Reader) parseDecimal256(field array.Builder, str string, prec, scale int32) { if r.isNull(str) { field.AppendNull() return } - if !(strings.HasPrefix(str, "{") && strings.HasSuffix(str, "}")) { - r.err = errors.New("invalid list format. should start with '{' and end with '}'") - return - } - str = strings.Trim(str, "{}") - listBldr := field.(*array.ListBuilder) - listBldr.Append(true) - if len(str) == 0 { - // we don't want to create the csv reader if we already know the - // string is empty - return - } - valueBldr := listBldr.ValueBuilder() - reader := csv.NewReader(strings.NewReader(str)) - items, err := reader.Read() - if err != nil { + + val, err := decimal256.FromString(str, prec, scale) + if err != nil && r.err == nil { r.err = err + field.AppendNull() return } - for _, str := range items { - r.initFieldConverter(valueBldr)(str) - } + field.(*array.Decimal256Builder).Append(val) } -func (r *Reader) parseLargeList(field array.Builder, str string) { +func (r *Reader) parseListLike(field array.ListLikeBuilder, str string) { if r.isNull(str) { field.AppendNull() return @@ -824,14 +810,13 @@ func (r *Reader) parseLargeList(field array.Builder, str string) { return } str = strings.Trim(str, "{}") - largeListBldr := field.(*array.LargeListBuilder) - largeListBldr.Append(true) + field.Append(true) if len(str) == 0 { // we don't want to create the csv reader if we already know the // string is empty return } - valueBldr := largeListBldr.ValueBuilder() + valueBldr := field.ValueBuilder() reader := csv.NewReader(strings.NewReader(str)) items, err := reader.Read() if err != nil { @@ -843,7 +828,7 @@ func (r *Reader) parseLargeList(field array.Builder, str string) { } } -func (r *Reader) parseFixedSizeList(field array.Builder, str string, n int) { +func (r *Reader) parseFixedSizeList(field *array.FixedSizeListBuilder, str string, n int) { if r.isNull(str) { field.AppendNull() return @@ -853,14 +838,13 @@ func (r *Reader) parseFixedSizeList(field array.Builder, str string, n int) { return } str = strings.Trim(str, "{}") - fixedSizeListBldr := field.(*array.FixedSizeListBuilder) - fixedSizeListBldr.Append(true) + field.Append(true) if len(str) == 0 { // we don't want to create the csv reader if we already know the // string is empty return } - valueBldr := fixedSizeListBldr.ValueBuilder() + valueBldr := field.ValueBuilder() reader := csv.NewReader(strings.NewReader(str)) items, err := reader.Read() if err != nil { diff --git a/go/arrow/csv/reader_test.go b/go/arrow/csv/reader_test.go index b6654dd1984ea..65453db015a7e 100644 --- a/go/arrow/csv/reader_test.go +++ b/go/arrow/csv/reader_test.go @@ -357,6 +357,8 @@ func testCSVReader(t *testing.T, filepath string, withHeader bool, stringsCanBeN {Name: "large_binary", Type: arrow.BinaryTypes.LargeBinary}, {Name: "fixed_size_binary", Type: &arrow.FixedSizeBinaryType{ByteWidth: 3}}, {Name: "uuid", Type: types.NewUUIDType()}, + {Name: "date32", Type: arrow.PrimitiveTypes.Date32}, + {Name: "date64", Type: arrow.PrimitiveTypes.Date64}, }, nil, ) @@ -420,6 +422,8 @@ rec[0]["binary"]: ["\x00\x01\x02"] rec[0]["large_binary"]: ["\x00\x01\x02"] rec[0]["fixed_size_binary"]: ["\x00\x01\x02"] rec[0]["uuid"]: ["00000000-0000-0000-0000-000000000001"] +rec[0]["date32"]: [19121] +rec[0]["date64"]: [1652054400000] rec[1]["bool"]: [false] rec[1]["i8"]: [-2] rec[1]["i16"]: [-2] @@ -442,6 +446,8 @@ rec[1]["binary"]: [(null)] rec[1]["large_binary"]: [(null)] rec[1]["fixed_size_binary"]: [(null)] rec[1]["uuid"]: ["00000000-0000-0000-0000-000000000002"] +rec[1]["date32"]: [19121] +rec[1]["date64"]: [1652054400000] rec[2]["bool"]: [(null)] rec[2]["i8"]: [(null)] rec[2]["i16"]: [(null)] @@ -464,6 +470,8 @@ rec[2]["binary"]: [(null)] rec[2]["large_binary"]: [(null)] rec[2]["fixed_size_binary"]: [(null)] rec[2]["uuid"]: [(null)] +rec[2]["date32"]: [(null)] +rec[2]["date64"]: [(null)] `, str1Value, str1Value, str2Value, str2Value) got, want := out.String(), want require.Equal(t, want, got) diff --git a/go/arrow/csv/testdata/header.csv b/go/arrow/csv/testdata/header.csv index 50be4f5e4daca..68ae18a499dee 100644 --- a/go/arrow/csv/testdata/header.csv +++ b/go/arrow/csv/testdata/header.csv @@ -15,7 +15,7 @@ # specific language governing permissions and limitations # under the License. # -bool;i8;i16;i32;i64;u8;u16;u32;u64;f16;f32;f64;str;large_str;ts;list(i64);large_list(i64);fixed_size_list(i64);binary;large_binary;fixed_size_binary;uuid -true;-1;-1;-1;-1;1;1;1;1;1.1;1.1;1.1;str-1;str-1;2022-05-09T00:01:01;{1,2,3};{1,2,3};{1,2,3};AAEC;AAEC;AAEC;00000000-0000-0000-0000-000000000001 -false;-2;-2;-2;-2;2;2;2;2;2.2;2.2;2.2;;;2022-05-09T23:59:59;{};{};{4,5,6};;;;00000000-0000-0000-0000-000000000002 -null;NULL;null;N/A;;null;null;null;null;null;null;null;null;null;null;null;null;null;null;null;null;null \ No newline at end of file +bool;i8;i16;i32;i64;u8;u16;u32;u64;f16;f32;f64;str;large_str;ts;list(i64);large_list(i64);fixed_size_list(i64);binary;large_binary;fixed_size_binary;uuid;date32;date64 +true;-1;-1;-1;-1;1;1;1;1;1.1;1.1;1.1;str-1;str-1;2022-05-09T00:01:01;{1,2,3};{1,2,3};{1,2,3};AAEC;AAEC;AAEC;00000000-0000-0000-0000-000000000001;2022-05-09;2022-05-09 +false;-2;-2;-2;-2;2;2;2;2;2.2;2.2;2.2;;;2022-05-09T23:59:59;{};{};{4,5,6};;;;00000000-0000-0000-0000-000000000002;2022-05-09;2022-05-09 +null;NULL;null;N/A;;null;null;null;null;null;null;null;null;null;null;null;null;null;null;null;null;null;null;null \ No newline at end of file diff --git a/go/arrow/csv/testdata/types.csv b/go/arrow/csv/testdata/types.csv index d32941f4b214d..91c0cf3b252b3 100644 --- a/go/arrow/csv/testdata/types.csv +++ b/go/arrow/csv/testdata/types.csv @@ -15,7 +15,7 @@ # specific language governing permissions and limitations # under the License. # -## supported types: bool;int8;int16;int32;int64;uint8;uint16;uint32;uint64;float16;float32;float64;string;large_string;timestamp;list(i64);large_list(i64);fixed_size_list(i64);binary;large_binary;fixed_size_binary;uuid -true;-1;-1;-1;-1;1;1;1;1;1.1;1.1;1.1;str-1;str-1;2022-05-09T00:01:01;{1,2,3};{1,2,3};{1,2,3};AAEC;AAEC;AAEC;00000000-0000-0000-0000-000000000001 -false;-2;-2;-2;-2;2;2;2;2;2.2;2.2;2.2;;;2022-05-09T23:59:59;{};{};{4,5,6};;;;00000000-0000-0000-0000-000000000002 -null;NULL;null;N/A;;null;null;null;null;null;null;null;null;null;null;null;null;null;null;null;null;null \ No newline at end of file +## supported types: bool;int8;int16;int32;int64;uint8;uint16;uint32;uint64;float16;float32;float64;string;large_string;timestamp;list(i64);large_list(i64);fixed_size_list(i64);binary;large_binary;fixed_size_binary;uuid;date32;date64 +true;-1;-1;-1;-1;1;1;1;1;1.1;1.1;1.1;str-1;str-1;2022-05-09T00:01:01;{1,2,3};{1,2,3};{1,2,3};AAEC;AAEC;AAEC;00000000-0000-0000-0000-000000000001;2022-05-09;2022-05-09 +false;-2;-2;-2;-2;2;2;2;2;2.2;2.2;2.2;;;2022-05-09T23:59:59;{};{};{4,5,6};;;;00000000-0000-0000-0000-000000000002;2022-05-09;2022-05-09 +null;NULL;null;N/A;;null;null;null;null;null;null;null;null;null;null;null;null;null;null;null;null;null;null;null \ No newline at end of file diff --git a/go/arrow/csv/transformer.go b/go/arrow/csv/transformer.go index 90c26ac981078..237437c0441e1 100644 --- a/go/arrow/csv/transformer.go +++ b/go/arrow/csv/transformer.go @@ -29,7 +29,7 @@ import ( "github.com/apache/arrow/go/v17/arrow/array" ) -func (w *Writer) transformColToStringArr(typ arrow.DataType, col arrow.Array, stringsReplacer func(string)string) []string { +func (w *Writer) transformColToStringArr(typ arrow.DataType, col arrow.Array, stringsReplacer func(string) string) []string { res := make([]string, col.Len()) switch typ.(type) { case *arrow.BooleanType: @@ -215,62 +215,25 @@ func (w *Writer) transformColToStringArr(typ arrow.DataType, col arrow.Array, st res[i] = w.nullValue } } - case *arrow.ListType: - arr := col.(*array.List) - listVals, offsets := arr.ListValues(), arr.Offsets() - for i := 0; i < arr.Len(); i++ { - if arr.IsValid(i) { - list := array.NewSlice(listVals, int64(offsets[i]), int64(offsets[i+1])) - var b bytes.Buffer - b.Write([]byte{'{'}) - writer := csv.NewWriter(&b) - writer.Write(w.transformColToStringArr(list.DataType(), list, stringsReplacer)) - writer.Flush() - b.Truncate(b.Len() - 1) - b.Write([]byte{'}'}) - res[i] = b.String() - list.Release() - } else { - res[i] = w.nullValue - } - } - case *arrow.LargeListType: - arr := col.(*array.LargeList) - listVals, offsets := arr.ListValues(), arr.Offsets() - for i := 0; i < arr.Len(); i++ { - if arr.IsValid(i) { - list := array.NewSlice(listVals, int64(offsets[i]), int64(offsets[i+1])) - var b bytes.Buffer - b.Write([]byte{'{'}) - writer := csv.NewWriter(&b) - writer.Write(w.transformColToStringArr(list.DataType(), list, stringsReplacer)) - writer.Flush() - b.Truncate(b.Len() - 1) - b.Write([]byte{'}'}) - res[i] = b.String() - list.Release() - } else { - res[i] = w.nullValue - } - } - case *arrow.FixedSizeListType: - arr := col.(*array.FixedSizeList) + case arrow.ListLikeType: + arr := col.(array.ListLike) listVals := arr.ListValues() for i := 0; i < arr.Len(); i++ { - if arr.IsValid(i) { - list := array.NewSlice(listVals, int64((arr.Len()-1)*i), int64((arr.Len()-1)*(i+1))) - var b bytes.Buffer - b.Write([]byte{'{'}) - writer := csv.NewWriter(&b) - writer.Write(w.transformColToStringArr(list.DataType(), list, stringsReplacer)) - writer.Flush() - b.Truncate(b.Len() - 1) - b.Write([]byte{'}'}) - res[i] = b.String() - list.Release() - } else { + if arr.IsNull(i) { res[i] = w.nullValue + continue } + start, end := arr.ValueOffsets(i) + list := array.NewSlice(listVals, start, end) + var b bytes.Buffer + b.Write([]byte{'{'}) + writer := csv.NewWriter(&b) + writer.Write(w.transformColToStringArr(list.DataType(), list, stringsReplacer)) + writer.Flush() + b.Truncate(b.Len() - 1) + b.Write([]byte{'}'}) + res[i] = b.String() + list.Release() } case *arrow.BinaryType: arr := col.(*array.Binary) diff --git a/go/arrow/datatype_nested.go b/go/arrow/datatype_nested.go index 1e65ccd4594ca..b38983b7f2e5d 100644 --- a/go/arrow/datatype_nested.go +++ b/go/arrow/datatype_nested.go @@ -877,7 +877,7 @@ func DenseUnionFromArrays(children []Array, fields []string, codes []UnionTypeCo } // DenseUnionOf is equivalent to UnionOf(arrow.DenseMode, fields, typeCodes), -// constructing a SparseUnionType from a list of fields and type codes. +// constructing a DenseUnionType from a list of fields and type codes. // // If len(fields) != len(typeCodes) this will panic. They are allowed to be // of length 0. diff --git a/go/arrow/datatype_nested_test.go b/go/arrow/datatype_nested_test.go index a1daa8e58df31..fc4c672c6b768 100644 --- a/go/arrow/datatype_nested_test.go +++ b/go/arrow/datatype_nested_test.go @@ -94,14 +94,14 @@ func TestStructOf(t *testing.T) { fields: []Field{{Name: "f1", Type: PrimitiveTypes.Int32}}, want: &StructType{ fields: []Field{{Name: "f1", Type: PrimitiveTypes.Int32}}, - index: map[string][]int{"f1": []int{0}}, + index: map[string][]int{"f1": {0}}, }, }, { fields: []Field{{Name: "f1", Type: PrimitiveTypes.Int32, Nullable: true}}, want: &StructType{ fields: []Field{{Name: "f1", Type: PrimitiveTypes.Int32, Nullable: true}}, - index: map[string][]int{"f1": []int{0}}, + index: map[string][]int{"f1": {0}}, }, }, { @@ -114,7 +114,7 @@ func TestStructOf(t *testing.T) { {Name: "f1", Type: PrimitiveTypes.Int32}, {Name: "", Type: PrimitiveTypes.Int64}, }, - index: map[string][]int{"f1": []int{0}, "": []int{1}}, + index: map[string][]int{"f1": {0}, "": {1}}, }, }, { @@ -127,7 +127,7 @@ func TestStructOf(t *testing.T) { {Name: "f1", Type: PrimitiveTypes.Int32}, {Name: "f2", Type: PrimitiveTypes.Int64}, }, - index: map[string][]int{"f1": []int{0}, "f2": []int{1}}, + index: map[string][]int{"f1": {0}, "f2": {1}}, }, }, { @@ -142,7 +142,7 @@ func TestStructOf(t *testing.T) { {Name: "f2", Type: PrimitiveTypes.Int64}, {Name: "f3", Type: ListOf(PrimitiveTypes.Float64)}, }, - index: map[string][]int{"f1": []int{0}, "f2": []int{1}, "f3": []int{2}}, + index: map[string][]int{"f1": {0}, "f2": {1}, "f3": {2}}, }, }, { @@ -157,7 +157,7 @@ func TestStructOf(t *testing.T) { {Name: "f2", Type: PrimitiveTypes.Int64}, {Name: "f3", Type: ListOf(ListOf(PrimitiveTypes.Float64))}, }, - index: map[string][]int{"f1": []int{0}, "f2": []int{1}, "f3": []int{2}}, + index: map[string][]int{"f1": {0}, "f2": {1}, "f3": {2}}, }, }, { @@ -172,7 +172,7 @@ func TestStructOf(t *testing.T) { {Name: "f2", Type: PrimitiveTypes.Int64}, {Name: "f3", Type: ListOf(ListOf(StructOf(Field{Name: "f1", Type: PrimitiveTypes.Float64})))}, }, - index: map[string][]int{"f1": []int{0}, "f2": []int{1}, "f3": []int{2}}, + index: map[string][]int{"f1": {0}, "f2": {1}, "f3": {2}}, }, }, { @@ -187,7 +187,7 @@ func TestStructOf(t *testing.T) { {Name: "f2", Type: PrimitiveTypes.Int64}, {Name: "f1", Type: PrimitiveTypes.Int64}, }, - index: map[string][]int{"f1": []int{0, 2}, "f2": []int{1}}, + index: map[string][]int{"f1": {0, 2}, "f2": {1}}, }, }, } { diff --git a/go/arrow/flight/flightsql/client.go b/go/arrow/flight/flightsql/client.go index e594191c35fdf..c6794820dc172 100644 --- a/go/arrow/flight/flightsql/client.go +++ b/go/arrow/flight/flightsql/client.go @@ -1119,24 +1119,10 @@ func (p *PreparedStatement) Execute(ctx context.Context, opts ...grpc.CallOption return nil, err } - if p.hasBindParameters() { - pstream, err := p.client.Client.DoPut(ctx, opts...) - if err != nil { - return nil, err - } - wr, err := p.writeBindParameters(pstream, desc) - if err != nil { - return nil, err - } - if err = wr.Close(); err != nil { - return nil, err - } - pstream.CloseSend() - if err = p.captureDoPutPreparedStatementHandle(pstream); err != nil { - return nil, err - } + desc, err = p.bindParameters(ctx, desc, opts...) + if err != nil { + return nil, err } - return p.client.getFlightInfo(ctx, desc, opts...) } @@ -1156,23 +1142,9 @@ func (p *PreparedStatement) ExecutePut(ctx context.Context, opts ...grpc.CallOpt return err } - if p.hasBindParameters() { - pstream, err := p.client.Client.DoPut(ctx, opts...) - if err != nil { - return err - } - - wr, err := p.writeBindParameters(pstream, desc) - if err != nil { - return err - } - if err = wr.Close(); err != nil { - return err - } - pstream.CloseSend() - if err = p.captureDoPutPreparedStatementHandle(pstream); err != nil { - return err - } + _, err = p.bindParameters(ctx, desc, opts...) + if err != nil { + return err } return nil @@ -1200,23 +1172,9 @@ func (p *PreparedStatement) ExecutePoll(ctx context.Context, retryDescriptor *fl } if retryDescriptor == nil { - if p.hasBindParameters() { - pstream, err := p.client.Client.DoPut(ctx, opts...) - if err != nil { - return nil, err - } - - wr, err := p.writeBindParameters(pstream, desc) - if err != nil { - return nil, err - } - if err = wr.Close(); err != nil { - return nil, err - } - pstream.CloseSend() - if err = p.captureDoPutPreparedStatementHandle(pstream); err != nil { - return nil, err - } + desc, err = p.bindParameters(ctx, desc, opts...) + if err != nil { + return nil, err } } return p.client.Client.PollFlightInfo(ctx, desc, opts...) @@ -1248,7 +1206,7 @@ func (p *PreparedStatement) ExecuteUpdate(ctx context.Context, opts ...grpc.Call return } if p.hasBindParameters() { - wr, err = p.writeBindParameters(pstream, desc) + wr, err = p.writeBindParametersToStream(pstream, desc) if err != nil { return } @@ -1283,7 +1241,36 @@ func (p *PreparedStatement) hasBindParameters() bool { return (p.paramBinding != nil && p.paramBinding.NumRows() > 0) || (p.streamBinding != nil) } -func (p *PreparedStatement) writeBindParameters(pstream pb.FlightService_DoPutClient, desc *pb.FlightDescriptor) (*flight.Writer, error) { +func (p *PreparedStatement) bindParameters(ctx context.Context, desc *pb.FlightDescriptor, opts ...grpc.CallOption) (*flight.FlightDescriptor, error) { + if p.hasBindParameters() { + pstream, err := p.client.Client.DoPut(ctx, opts...) + if err != nil { + return nil, err + } + wr, err := p.writeBindParametersToStream(pstream, desc) + if err != nil { + return nil, err + } + if err = wr.Close(); err != nil { + return nil, err + } + pstream.CloseSend() + if err = p.captureDoPutPreparedStatementHandle(pstream); err != nil { + return nil, err + } + + cmd := pb.CommandPreparedStatementQuery{PreparedStatementHandle: p.handle} + desc, err = descForCommand(&cmd) + if err != nil { + return nil, err + } + return desc, nil + } + return desc, nil +} + +// XXX: this does not capture the updated handle. Prefer bindParameters. +func (p *PreparedStatement) writeBindParametersToStream(pstream pb.FlightService_DoPutClient, desc *pb.FlightDescriptor) (*flight.Writer, error) { if p.paramBinding != nil { wr := flight.NewRecordWriter(pstream, ipc.WithSchema(p.paramBinding.Schema())) wr.SetFlightDescriptor(desc) diff --git a/go/arrow/flight/flightsql/client_test.go b/go/arrow/flight/flightsql/client_test.go index 727fe02aa7063..33da79167c4ae 100644 --- a/go/arrow/flight/flightsql/client_test.go +++ b/go/arrow/flight/flightsql/client_test.go @@ -448,9 +448,9 @@ func (s *FlightSqlClientSuite) TestPreparedStatementExecuteParamBinding() { expectedDesc := getDesc(&pb.CommandPreparedStatementQuery{PreparedStatementHandle: []byte(handle)}) // mocked DoPut result - doPutPreparedStatementResult := &pb.DoPutPreparedStatementResult{PreparedStatementHandle: []byte(updatedHandle)} + doPutPreparedStatementResult := &pb.DoPutPreparedStatementResult{PreparedStatementHandle: []byte(updatedHandle)} resdata, _ := proto.Marshal(doPutPreparedStatementResult) - putResult := &pb.PutResult{ AppMetadata: resdata } + putResult := &pb.PutResult{AppMetadata: resdata} // mocked client stream for DoPut mockedPut := &mockDoPutClient{} @@ -461,7 +461,7 @@ func (s *FlightSqlClientSuite) TestPreparedStatementExecuteParamBinding() { mockedPut.On("CloseSend").Return(nil) mockedPut.On("Recv").Return(putResult, nil) - infoCmd := &pb.CommandPreparedStatementQuery{PreparedStatementHandle: []byte(handle)} + infoCmd := &pb.CommandPreparedStatementQuery{PreparedStatementHandle: []byte(updatedHandle)} desc := getDesc(infoCmd) s.mockClient.On("GetFlightInfo", desc.Type, desc.Cmd, s.callOpts).Return(&emptyFlightInfo, nil) @@ -525,9 +525,9 @@ func (s *FlightSqlClientSuite) TestPreparedStatementExecuteReaderBinding() { expectedDesc := getDesc(&pb.CommandPreparedStatementQuery{PreparedStatementHandle: []byte(query)}) // mocked DoPut result - doPutPreparedStatementResult := &pb.DoPutPreparedStatementResult{PreparedStatementHandle: []byte(query)} + doPutPreparedStatementResult := &pb.DoPutPreparedStatementResult{PreparedStatementHandle: []byte(query)} resdata, _ := proto.Marshal(doPutPreparedStatementResult) - putResult := &pb.PutResult{ AppMetadata: resdata } + putResult := &pb.PutResult{AppMetadata: resdata} // mocked client stream for DoPut mockedPut := &mockDoPutClient{} diff --git a/go/arrow/flight/gen/flight/Flight.pb.go b/go/arrow/flight/gen/flight/Flight.pb.go index d9477ee062fa8..ea35f469116ab 100644 --- a/go/arrow/flight/gen/flight/Flight.pb.go +++ b/go/arrow/flight/gen/flight/Flight.pb.go @@ -24,11 +24,12 @@ package flight import ( + reflect "reflect" + sync "sync" + protoreflect "google.golang.org/protobuf/reflect/protoreflect" protoimpl "google.golang.org/protobuf/runtime/protoimpl" timestamppb "google.golang.org/protobuf/types/known/timestamppb" - reflect "reflect" - sync "sync" ) const ( diff --git a/go/arrow/flight/gen/flight/FlightSql.pb.go b/go/arrow/flight/gen/flight/FlightSql.pb.go index 196c1d6b33643..f8f5e17d76bd2 100644 --- a/go/arrow/flight/gen/flight/FlightSql.pb.go +++ b/go/arrow/flight/gen/flight/FlightSql.pb.go @@ -24,11 +24,12 @@ package flight import ( + reflect "reflect" + sync "sync" + protoreflect "google.golang.org/protobuf/reflect/protoreflect" protoimpl "google.golang.org/protobuf/runtime/protoimpl" descriptorpb "google.golang.org/protobuf/types/descriptorpb" - reflect "reflect" - sync "sync" ) const ( diff --git a/go/arrow/flight/gen/flight/Flight_grpc.pb.go b/go/arrow/flight/gen/flight/Flight_grpc.pb.go index 11bbb00131ddb..da5601b46ab95 100644 --- a/go/arrow/flight/gen/flight/Flight_grpc.pb.go +++ b/go/arrow/flight/gen/flight/Flight_grpc.pb.go @@ -8,6 +8,7 @@ package flight import ( context "context" + grpc "google.golang.org/grpc" codes "google.golang.org/grpc/codes" status "google.golang.org/grpc/status" diff --git a/go/arrow/float16/float16.go b/go/arrow/float16/float16.go index ecf5c9ddce9db..f61db40ef498c 100644 --- a/go/arrow/float16/float16.go +++ b/go/arrow/float16/float16.go @@ -175,7 +175,7 @@ func (n Num) Signbit() bool { return (n.bits & 0x8000) != 0 } func (n Num) IsNaN() bool { return (n.bits & 0x7fff) > 0x7c00 } -func (n Num) IsInf() bool {return (n.bits & 0x7c00) == 0x7c00 } +func (n Num) IsInf() bool { return (n.bits & 0x7c00) == 0x7c00 } func (n Num) IsZero() bool { return (n.bits & 0x7fff) == 0 } diff --git a/go/arrow/gen-flatbuffers.go b/go/arrow/gen-flatbuffers.go index 5c8eba4a24757..720016e0bf168 100644 --- a/go/arrow/gen-flatbuffers.go +++ b/go/arrow/gen-flatbuffers.go @@ -14,6 +14,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build ignore // +build ignore package main diff --git a/go/arrow/internal/debug/assert_off.go b/go/arrow/internal/debug/assert_off.go index 52b9a233169d2..1450ecc98a26e 100644 --- a/go/arrow/internal/debug/assert_off.go +++ b/go/arrow/internal/debug/assert_off.go @@ -14,6 +14,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build !assert // +build !assert package debug diff --git a/go/arrow/internal/debug/assert_on.go b/go/arrow/internal/debug/assert_on.go index 2aa5d6ace4cf0..4a57169b31358 100644 --- a/go/arrow/internal/debug/assert_on.go +++ b/go/arrow/internal/debug/assert_on.go @@ -14,6 +14,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build assert // +build assert package debug diff --git a/go/arrow/internal/debug/doc.go b/go/arrow/internal/debug/doc.go index 3ee1783ca4bda..094e427a22e09 100644 --- a/go/arrow/internal/debug/doc.go +++ b/go/arrow/internal/debug/doc.go @@ -17,14 +17,12 @@ /* Package debug provides APIs for conditional runtime assertions and debug logging. - -Using Assert +# Using Assert To enable runtime assertions, build with the assert tag. When the assert tag is omitted, the code for the assertion will be omitted from the binary. - -Using Log +# Using Log To enable runtime debug logs, build with the debug tag. When the debug tag is omitted, the code for logging will be omitted from the binary. diff --git a/go/arrow/internal/debug/log_off.go b/go/arrow/internal/debug/log_off.go index 48da8e1ee94c7..760a5cdc0dc01 100644 --- a/go/arrow/internal/debug/log_off.go +++ b/go/arrow/internal/debug/log_off.go @@ -14,6 +14,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build !debug // +build !debug package debug diff --git a/go/arrow/internal/debug/log_on.go b/go/arrow/internal/debug/log_on.go index 99d0c8ae33fef..2588e7d1069f0 100644 --- a/go/arrow/internal/debug/log_on.go +++ b/go/arrow/internal/debug/log_on.go @@ -14,6 +14,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build debug // +build debug package debug diff --git a/go/arrow/internal/debug/util.go b/go/arrow/internal/debug/util.go index 7bd3d5389e669..ea4eba7fb5cb8 100644 --- a/go/arrow/internal/debug/util.go +++ b/go/arrow/internal/debug/util.go @@ -14,6 +14,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build debug || assert // +build debug assert package debug diff --git a/go/arrow/internal/flatbuf/Binary.go b/go/arrow/internal/flatbuf/Binary.go index e8018e74c4151..95e015595b548 100644 --- a/go/arrow/internal/flatbuf/Binary.go +++ b/go/arrow/internal/flatbuf/Binary.go @@ -22,7 +22,7 @@ import ( flatbuffers "github.com/google/flatbuffers/go" ) -/// Opaque binary data +// / Opaque binary data type Binary struct { _tab flatbuffers.Table } diff --git a/go/arrow/internal/flatbuf/BinaryView.go b/go/arrow/internal/flatbuf/BinaryView.go index 09ca5e7db9601..f6906674bdbc7 100644 --- a/go/arrow/internal/flatbuf/BinaryView.go +++ b/go/arrow/internal/flatbuf/BinaryView.go @@ -22,13 +22,13 @@ import ( flatbuffers "github.com/google/flatbuffers/go" ) -/// Logically the same as Binary, but the internal representation uses a view -/// struct that contains the string length and either the string's entire data -/// inline (for small strings) or an inlined prefix, an index of another buffer, -/// and an offset pointing to a slice in that buffer (for non-small strings). -/// -/// Since it uses a variable number of data buffers, each Field with this type -/// must have a corresponding entry in `variadicBufferCounts`. +// / Logically the same as Binary, but the internal representation uses a view +// / struct that contains the string length and either the string's entire data +// / inline (for small strings) or an inlined prefix, an index of another buffer, +// / and an offset pointing to a slice in that buffer (for non-small strings). +// / +// / Since it uses a variable number of data buffers, each Field with this type +// / must have a corresponding entry in `variadicBufferCounts`. type BinaryView struct { _tab flatbuffers.Table } diff --git a/go/arrow/internal/flatbuf/Block.go b/go/arrow/internal/flatbuf/Block.go index 57a697b196883..8e33d3e641543 100644 --- a/go/arrow/internal/flatbuf/Block.go +++ b/go/arrow/internal/flatbuf/Block.go @@ -35,31 +35,34 @@ func (rcv *Block) Table() flatbuffers.Table { return rcv._tab.Table } -/// Index to the start of the RecordBlock (note this is past the Message header) +// / Index to the start of the RecordBlock (note this is past the Message header) func (rcv *Block) Offset() int64 { return rcv._tab.GetInt64(rcv._tab.Pos + flatbuffers.UOffsetT(0)) } -/// Index to the start of the RecordBlock (note this is past the Message header) + +// / Index to the start of the RecordBlock (note this is past the Message header) func (rcv *Block) MutateOffset(n int64) bool { return rcv._tab.MutateInt64(rcv._tab.Pos+flatbuffers.UOffsetT(0), n) } -/// Length of the metadata +// / Length of the metadata func (rcv *Block) MetaDataLength() int32 { return rcv._tab.GetInt32(rcv._tab.Pos + flatbuffers.UOffsetT(8)) } -/// Length of the metadata + +// / Length of the metadata func (rcv *Block) MutateMetaDataLength(n int32) bool { return rcv._tab.MutateInt32(rcv._tab.Pos+flatbuffers.UOffsetT(8), n) } -/// Length of the data (this is aligned so there can be a gap between this and -/// the metadata). +// / Length of the data (this is aligned so there can be a gap between this and +// / the metadata). func (rcv *Block) BodyLength() int64 { return rcv._tab.GetInt64(rcv._tab.Pos + flatbuffers.UOffsetT(16)) } -/// Length of the data (this is aligned so there can be a gap between this and -/// the metadata). + +// / Length of the data (this is aligned so there can be a gap between this and +// / the metadata). func (rcv *Block) MutateBodyLength(n int64) bool { return rcv._tab.MutateInt64(rcv._tab.Pos+flatbuffers.UOffsetT(16), n) } diff --git a/go/arrow/internal/flatbuf/BodyCompression.go b/go/arrow/internal/flatbuf/BodyCompression.go index 6468e23135254..c23c29190216b 100644 --- a/go/arrow/internal/flatbuf/BodyCompression.go +++ b/go/arrow/internal/flatbuf/BodyCompression.go @@ -22,9 +22,9 @@ import ( flatbuffers "github.com/google/flatbuffers/go" ) -/// Optional compression for the memory buffers constituting IPC message -/// bodies. Intended for use with RecordBatch but could be used for other -/// message types +// / Optional compression for the memory buffers constituting IPC message +// / bodies. Intended for use with RecordBatch but could be used for other +// / message types type BodyCompression struct { _tab flatbuffers.Table } @@ -45,8 +45,8 @@ func (rcv *BodyCompression) Table() flatbuffers.Table { return rcv._tab } -/// Compressor library. -/// For LZ4_FRAME, each compressed buffer must consist of a single frame. +// / Compressor library. +// / For LZ4_FRAME, each compressed buffer must consist of a single frame. func (rcv *BodyCompression) Codec() CompressionType { o := flatbuffers.UOffsetT(rcv._tab.Offset(4)) if o != 0 { @@ -55,13 +55,13 @@ func (rcv *BodyCompression) Codec() CompressionType { return 0 } -/// Compressor library. -/// For LZ4_FRAME, each compressed buffer must consist of a single frame. +// / Compressor library. +// / For LZ4_FRAME, each compressed buffer must consist of a single frame. func (rcv *BodyCompression) MutateCodec(n CompressionType) bool { return rcv._tab.MutateInt8Slot(4, int8(n)) } -/// Indicates the way the record batch body was compressed +// / Indicates the way the record batch body was compressed func (rcv *BodyCompression) Method() BodyCompressionMethod { o := flatbuffers.UOffsetT(rcv._tab.Offset(6)) if o != 0 { @@ -70,7 +70,7 @@ func (rcv *BodyCompression) Method() BodyCompressionMethod { return 0 } -/// Indicates the way the record batch body was compressed +// / Indicates the way the record batch body was compressed func (rcv *BodyCompression) MutateMethod(n BodyCompressionMethod) bool { return rcv._tab.MutateInt8Slot(6, int8(n)) } diff --git a/go/arrow/internal/flatbuf/BodyCompressionMethod.go b/go/arrow/internal/flatbuf/BodyCompressionMethod.go index 108ab3e07fba6..bb7234b3989b5 100644 --- a/go/arrow/internal/flatbuf/BodyCompressionMethod.go +++ b/go/arrow/internal/flatbuf/BodyCompressionMethod.go @@ -20,9 +20,9 @@ package flatbuf import "strconv" -/// Provided for forward compatibility in case we need to support different -/// strategies for compressing the IPC message body (like whole-body -/// compression rather than buffer-level) in the future +// / Provided for forward compatibility in case we need to support different +// / strategies for compressing the IPC message body (like whole-body +// / compression rather than buffer-level) in the future type BodyCompressionMethod int8 const ( diff --git a/go/arrow/internal/flatbuf/Buffer.go b/go/arrow/internal/flatbuf/Buffer.go index eba8d99b28e9b..e650e06a57026 100644 --- a/go/arrow/internal/flatbuf/Buffer.go +++ b/go/arrow/internal/flatbuf/Buffer.go @@ -22,8 +22,8 @@ import ( flatbuffers "github.com/google/flatbuffers/go" ) -/// ---------------------------------------------------------------------- -/// A Buffer represents a single contiguous memory segment +// / ---------------------------------------------------------------------- +// / A Buffer represents a single contiguous memory segment type Buffer struct { _tab flatbuffers.Struct } @@ -37,30 +37,32 @@ func (rcv *Buffer) Table() flatbuffers.Table { return rcv._tab.Table } -/// The relative offset into the shared memory page where the bytes for this -/// buffer starts +// / The relative offset into the shared memory page where the bytes for this +// / buffer starts func (rcv *Buffer) Offset() int64 { return rcv._tab.GetInt64(rcv._tab.Pos + flatbuffers.UOffsetT(0)) } -/// The relative offset into the shared memory page where the bytes for this -/// buffer starts + +// / The relative offset into the shared memory page where the bytes for this +// / buffer starts func (rcv *Buffer) MutateOffset(n int64) bool { return rcv._tab.MutateInt64(rcv._tab.Pos+flatbuffers.UOffsetT(0), n) } -/// The absolute length (in bytes) of the memory buffer. The memory is found -/// from offset (inclusive) to offset + length (non-inclusive). When building -/// messages using the encapsulated IPC message, padding bytes may be written -/// after a buffer, but such padding bytes do not need to be accounted for in -/// the size here. +// / The absolute length (in bytes) of the memory buffer. The memory is found +// / from offset (inclusive) to offset + length (non-inclusive). When building +// / messages using the encapsulated IPC message, padding bytes may be written +// / after a buffer, but such padding bytes do not need to be accounted for in +// / the size here. func (rcv *Buffer) Length() int64 { return rcv._tab.GetInt64(rcv._tab.Pos + flatbuffers.UOffsetT(8)) } -/// The absolute length (in bytes) of the memory buffer. The memory is found -/// from offset (inclusive) to offset + length (non-inclusive). When building -/// messages using the encapsulated IPC message, padding bytes may be written -/// after a buffer, but such padding bytes do not need to be accounted for in -/// the size here. + +// / The absolute length (in bytes) of the memory buffer. The memory is found +// / from offset (inclusive) to offset + length (non-inclusive). When building +// / messages using the encapsulated IPC message, padding bytes may be written +// / after a buffer, but such padding bytes do not need to be accounted for in +// / the size here. func (rcv *Buffer) MutateLength(n int64) bool { return rcv._tab.MutateInt64(rcv._tab.Pos+flatbuffers.UOffsetT(8), n) } diff --git a/go/arrow/internal/flatbuf/Date.go b/go/arrow/internal/flatbuf/Date.go index 32983ec54ccc2..985a8f79955a4 100644 --- a/go/arrow/internal/flatbuf/Date.go +++ b/go/arrow/internal/flatbuf/Date.go @@ -22,12 +22,12 @@ import ( flatbuffers "github.com/google/flatbuffers/go" ) -/// Date is either a 32-bit or 64-bit signed integer type representing an -/// elapsed time since UNIX epoch (1970-01-01), stored in either of two units: -/// -/// * Milliseconds (64 bits) indicating UNIX time elapsed since the epoch (no -/// leap seconds), where the values are evenly divisible by 86400000 -/// * Days (32 bits) since the UNIX epoch +// / Date is either a 32-bit or 64-bit signed integer type representing an +// / elapsed time since UNIX epoch (1970-01-01), stored in either of two units: +// / +// / * Milliseconds (64 bits) indicating UNIX time elapsed since the epoch (no +// / leap seconds), where the values are evenly divisible by 86400000 +// / * Days (32 bits) since the UNIX epoch type Date struct { _tab flatbuffers.Table } diff --git a/go/arrow/internal/flatbuf/Decimal.go b/go/arrow/internal/flatbuf/Decimal.go index c9de254d1dcbd..2fc9d5ad6586c 100644 --- a/go/arrow/internal/flatbuf/Decimal.go +++ b/go/arrow/internal/flatbuf/Decimal.go @@ -22,10 +22,10 @@ import ( flatbuffers "github.com/google/flatbuffers/go" ) -/// Exact decimal value represented as an integer value in two's -/// complement. Currently only 128-bit (16-byte) and 256-bit (32-byte) integers -/// are used. The representation uses the endianness indicated -/// in the Schema. +// / Exact decimal value represented as an integer value in two's +// / complement. Currently only 128-bit (16-byte) and 256-bit (32-byte) integers +// / are used. The representation uses the endianness indicated +// / in the Schema. type Decimal struct { _tab flatbuffers.Table } @@ -46,7 +46,7 @@ func (rcv *Decimal) Table() flatbuffers.Table { return rcv._tab } -/// Total number of decimal digits +// / Total number of decimal digits func (rcv *Decimal) Precision() int32 { o := flatbuffers.UOffsetT(rcv._tab.Offset(4)) if o != 0 { @@ -55,12 +55,12 @@ func (rcv *Decimal) Precision() int32 { return 0 } -/// Total number of decimal digits +// / Total number of decimal digits func (rcv *Decimal) MutatePrecision(n int32) bool { return rcv._tab.MutateInt32Slot(4, n) } -/// Number of digits after the decimal point "." +// / Number of digits after the decimal point "." func (rcv *Decimal) Scale() int32 { o := flatbuffers.UOffsetT(rcv._tab.Offset(6)) if o != 0 { @@ -69,13 +69,13 @@ func (rcv *Decimal) Scale() int32 { return 0 } -/// Number of digits after the decimal point "." +// / Number of digits after the decimal point "." func (rcv *Decimal) MutateScale(n int32) bool { return rcv._tab.MutateInt32Slot(6, n) } -/// Number of bits per value. The only accepted widths are 128 and 256. -/// We use bitWidth for consistency with Int::bitWidth. +// / Number of bits per value. The only accepted widths are 128 and 256. +// / We use bitWidth for consistency with Int::bitWidth. func (rcv *Decimal) BitWidth() int32 { o := flatbuffers.UOffsetT(rcv._tab.Offset(8)) if o != 0 { @@ -84,8 +84,8 @@ func (rcv *Decimal) BitWidth() int32 { return 128 } -/// Number of bits per value. The only accepted widths are 128 and 256. -/// We use bitWidth for consistency with Int::bitWidth. +// / Number of bits per value. The only accepted widths are 128 and 256. +// / We use bitWidth for consistency with Int::bitWidth. func (rcv *Decimal) MutateBitWidth(n int32) bool { return rcv._tab.MutateInt32Slot(8, n) } diff --git a/go/arrow/internal/flatbuf/DictionaryBatch.go b/go/arrow/internal/flatbuf/DictionaryBatch.go index 25b5384e46a5c..999c5fda46384 100644 --- a/go/arrow/internal/flatbuf/DictionaryBatch.go +++ b/go/arrow/internal/flatbuf/DictionaryBatch.go @@ -22,12 +22,12 @@ import ( flatbuffers "github.com/google/flatbuffers/go" ) -/// For sending dictionary encoding information. Any Field can be -/// dictionary-encoded, but in this case none of its children may be -/// dictionary-encoded. -/// There is one vector / column per dictionary, but that vector / column -/// may be spread across multiple dictionary batches by using the isDelta -/// flag +// / For sending dictionary encoding information. Any Field can be +// / dictionary-encoded, but in this case none of its children may be +// / dictionary-encoded. +// / There is one vector / column per dictionary, but that vector / column +// / may be spread across multiple dictionary batches by using the isDelta +// / flag type DictionaryBatch struct { _tab flatbuffers.Table } @@ -73,9 +73,9 @@ func (rcv *DictionaryBatch) Data(obj *RecordBatch) *RecordBatch { return nil } -/// If isDelta is true the values in the dictionary are to be appended to a -/// dictionary with the indicated id. If isDelta is false this dictionary -/// should replace the existing dictionary. +// / If isDelta is true the values in the dictionary are to be appended to a +// / dictionary with the indicated id. If isDelta is false this dictionary +// / should replace the existing dictionary. func (rcv *DictionaryBatch) IsDelta() bool { o := flatbuffers.UOffsetT(rcv._tab.Offset(8)) if o != 0 { @@ -84,9 +84,9 @@ func (rcv *DictionaryBatch) IsDelta() bool { return false } -/// If isDelta is true the values in the dictionary are to be appended to a -/// dictionary with the indicated id. If isDelta is false this dictionary -/// should replace the existing dictionary. +// / If isDelta is true the values in the dictionary are to be appended to a +// / dictionary with the indicated id. If isDelta is false this dictionary +// / should replace the existing dictionary. func (rcv *DictionaryBatch) MutateIsDelta(n bool) bool { return rcv._tab.MutateBoolSlot(8, n) } diff --git a/go/arrow/internal/flatbuf/DictionaryEncoding.go b/go/arrow/internal/flatbuf/DictionaryEncoding.go index a9b09530b2a52..44c3874219f1c 100644 --- a/go/arrow/internal/flatbuf/DictionaryEncoding.go +++ b/go/arrow/internal/flatbuf/DictionaryEncoding.go @@ -42,9 +42,9 @@ func (rcv *DictionaryEncoding) Table() flatbuffers.Table { return rcv._tab } -/// The known dictionary id in the application where this data is used. In -/// the file or streaming formats, the dictionary ids are found in the -/// DictionaryBatch messages +// / The known dictionary id in the application where this data is used. In +// / the file or streaming formats, the dictionary ids are found in the +// / DictionaryBatch messages func (rcv *DictionaryEncoding) Id() int64 { o := flatbuffers.UOffsetT(rcv._tab.Offset(4)) if o != 0 { @@ -53,18 +53,18 @@ func (rcv *DictionaryEncoding) Id() int64 { return 0 } -/// The known dictionary id in the application where this data is used. In -/// the file or streaming formats, the dictionary ids are found in the -/// DictionaryBatch messages +// / The known dictionary id in the application where this data is used. In +// / the file or streaming formats, the dictionary ids are found in the +// / DictionaryBatch messages func (rcv *DictionaryEncoding) MutateId(n int64) bool { return rcv._tab.MutateInt64Slot(4, n) } -/// The dictionary indices are constrained to be non-negative integers. If -/// this field is null, the indices must be signed int32. To maximize -/// cross-language compatibility and performance, implementations are -/// recommended to prefer signed integer types over unsigned integer types -/// and to avoid uint64 indices unless they are required by an application. +// / The dictionary indices are constrained to be non-negative integers. If +// / this field is null, the indices must be signed int32. To maximize +// / cross-language compatibility and performance, implementations are +// / recommended to prefer signed integer types over unsigned integer types +// / and to avoid uint64 indices unless they are required by an application. func (rcv *DictionaryEncoding) IndexType(obj *Int) *Int { o := flatbuffers.UOffsetT(rcv._tab.Offset(6)) if o != 0 { @@ -78,15 +78,15 @@ func (rcv *DictionaryEncoding) IndexType(obj *Int) *Int { return nil } -/// The dictionary indices are constrained to be non-negative integers. If -/// this field is null, the indices must be signed int32. To maximize -/// cross-language compatibility and performance, implementations are -/// recommended to prefer signed integer types over unsigned integer types -/// and to avoid uint64 indices unless they are required by an application. -/// By default, dictionaries are not ordered, or the order does not have -/// semantic meaning. In some statistical, applications, dictionary-encoding -/// is used to represent ordered categorical data, and we provide a way to -/// preserve that metadata here +// / The dictionary indices are constrained to be non-negative integers. If +// / this field is null, the indices must be signed int32. To maximize +// / cross-language compatibility and performance, implementations are +// / recommended to prefer signed integer types over unsigned integer types +// / and to avoid uint64 indices unless they are required by an application. +// / By default, dictionaries are not ordered, or the order does not have +// / semantic meaning. In some statistical, applications, dictionary-encoding +// / is used to represent ordered categorical data, and we provide a way to +// / preserve that metadata here func (rcv *DictionaryEncoding) IsOrdered() bool { o := flatbuffers.UOffsetT(rcv._tab.Offset(8)) if o != 0 { @@ -95,10 +95,10 @@ func (rcv *DictionaryEncoding) IsOrdered() bool { return false } -/// By default, dictionaries are not ordered, or the order does not have -/// semantic meaning. In some statistical, applications, dictionary-encoding -/// is used to represent ordered categorical data, and we provide a way to -/// preserve that metadata here +// / By default, dictionaries are not ordered, or the order does not have +// / semantic meaning. In some statistical, applications, dictionary-encoding +// / is used to represent ordered categorical data, and we provide a way to +// / preserve that metadata here func (rcv *DictionaryEncoding) MutateIsOrdered(n bool) bool { return rcv._tab.MutateBoolSlot(8, n) } diff --git a/go/arrow/internal/flatbuf/DictionaryKind.go b/go/arrow/internal/flatbuf/DictionaryKind.go index 126ba5f7f6bb0..6825100515612 100644 --- a/go/arrow/internal/flatbuf/DictionaryKind.go +++ b/go/arrow/internal/flatbuf/DictionaryKind.go @@ -20,11 +20,11 @@ package flatbuf import "strconv" -/// ---------------------------------------------------------------------- -/// Dictionary encoding metadata -/// Maintained for forwards compatibility, in the future -/// Dictionaries might be explicit maps between integers and values -/// allowing for non-contiguous index values +// / ---------------------------------------------------------------------- +// / Dictionary encoding metadata +// / Maintained for forwards compatibility, in the future +// / Dictionaries might be explicit maps between integers and values +// / allowing for non-contiguous index values type DictionaryKind int16 const ( diff --git a/go/arrow/internal/flatbuf/Endianness.go b/go/arrow/internal/flatbuf/Endianness.go index cefa2ff9c06ed..c9619b7b0d978 100644 --- a/go/arrow/internal/flatbuf/Endianness.go +++ b/go/arrow/internal/flatbuf/Endianness.go @@ -20,8 +20,8 @@ package flatbuf import "strconv" -/// ---------------------------------------------------------------------- -/// Endianness of the platform producing the data +// / ---------------------------------------------------------------------- +// / Endianness of the platform producing the data type Endianness int16 const ( diff --git a/go/arrow/internal/flatbuf/Feature.go b/go/arrow/internal/flatbuf/Feature.go index ae5a0398b607d..2204c440ed4fe 100644 --- a/go/arrow/internal/flatbuf/Feature.go +++ b/go/arrow/internal/flatbuf/Feature.go @@ -20,35 +20,35 @@ package flatbuf import "strconv" -/// Represents Arrow Features that might not have full support -/// within implementations. This is intended to be used in -/// two scenarios: -/// 1. A mechanism for readers of Arrow Streams -/// and files to understand that the stream or file makes -/// use of a feature that isn't supported or unknown to -/// the implementation (and therefore can meet the Arrow -/// forward compatibility guarantees). -/// 2. A means of negotiating between a client and server -/// what features a stream is allowed to use. The enums -/// values here are intented to represent higher level -/// features, additional details maybe negotiated -/// with key-value pairs specific to the protocol. -/// -/// Enums added to this list should be assigned power-of-two values -/// to facilitate exchanging and comparing bitmaps for supported -/// features. +// / Represents Arrow Features that might not have full support +// / within implementations. This is intended to be used in +// / two scenarios: +// / 1. A mechanism for readers of Arrow Streams +// / and files to understand that the stream or file makes +// / use of a feature that isn't supported or unknown to +// / the implementation (and therefore can meet the Arrow +// / forward compatibility guarantees). +// / 2. A means of negotiating between a client and server +// / what features a stream is allowed to use. The enums +// / values here are intented to represent higher level +// / features, additional details maybe negotiated +// / with key-value pairs specific to the protocol. +// / +// / Enums added to this list should be assigned power-of-two values +// / to facilitate exchanging and comparing bitmaps for supported +// / features. type Feature int64 const ( /// Needed to make flatbuffers happy. - FeatureUNUSED Feature = 0 + FeatureUNUSED Feature = 0 /// The stream makes use of multiple full dictionaries with the /// same ID and assumes clients implement dictionary replacement /// correctly. FeatureDICTIONARY_REPLACEMENT Feature = 1 /// The stream makes use of compressed bodies as described /// in Message.fbs. - FeatureCOMPRESSED_BODY Feature = 2 + FeatureCOMPRESSED_BODY Feature = 2 ) var EnumNamesFeature = map[Feature]string{ diff --git a/go/arrow/internal/flatbuf/Field.go b/go/arrow/internal/flatbuf/Field.go index c03cf2f878b6f..8aed29bc48137 100644 --- a/go/arrow/internal/flatbuf/Field.go +++ b/go/arrow/internal/flatbuf/Field.go @@ -22,9 +22,9 @@ import ( flatbuffers "github.com/google/flatbuffers/go" ) -/// ---------------------------------------------------------------------- -/// A field represents a named column in a record / row batch or child of a -/// nested type. +// / ---------------------------------------------------------------------- +// / A field represents a named column in a record / row batch or child of a +// / nested type. type Field struct { _tab flatbuffers.Table } @@ -45,7 +45,7 @@ func (rcv *Field) Table() flatbuffers.Table { return rcv._tab } -/// Name is not required, in i.e. a List +// / Name is not required, in i.e. a List func (rcv *Field) Name() []byte { o := flatbuffers.UOffsetT(rcv._tab.Offset(4)) if o != 0 { @@ -54,8 +54,8 @@ func (rcv *Field) Name() []byte { return nil } -/// Name is not required, in i.e. a List -/// Whether or not this field can contain nulls. Should be true in general. +// / Name is not required, in i.e. a List +// / Whether or not this field can contain nulls. Should be true in general. func (rcv *Field) Nullable() bool { o := flatbuffers.UOffsetT(rcv._tab.Offset(6)) if o != 0 { @@ -64,7 +64,7 @@ func (rcv *Field) Nullable() bool { return false } -/// Whether or not this field can contain nulls. Should be true in general. +// / Whether or not this field can contain nulls. Should be true in general. func (rcv *Field) MutateNullable(n bool) bool { return rcv._tab.MutateBoolSlot(6, n) } @@ -81,7 +81,7 @@ func (rcv *Field) MutateTypeType(n Type) bool { return rcv._tab.MutateByteSlot(8, byte(n)) } -/// This is the type of the decoded value if the field is dictionary encoded. +// / This is the type of the decoded value if the field is dictionary encoded. func (rcv *Field) Type(obj *flatbuffers.Table) bool { o := flatbuffers.UOffsetT(rcv._tab.Offset(10)) if o != 0 { @@ -91,8 +91,8 @@ func (rcv *Field) Type(obj *flatbuffers.Table) bool { return false } -/// This is the type of the decoded value if the field is dictionary encoded. -/// Present only if the field is dictionary encoded. +// / This is the type of the decoded value if the field is dictionary encoded. +// / Present only if the field is dictionary encoded. func (rcv *Field) Dictionary(obj *DictionaryEncoding) *DictionaryEncoding { o := flatbuffers.UOffsetT(rcv._tab.Offset(12)) if o != 0 { @@ -106,9 +106,9 @@ func (rcv *Field) Dictionary(obj *DictionaryEncoding) *DictionaryEncoding { return nil } -/// Present only if the field is dictionary encoded. -/// children apply only to nested data types like Struct, List and Union. For -/// primitive types children will have length 0. +// / Present only if the field is dictionary encoded. +// / children apply only to nested data types like Struct, List and Union. For +// / primitive types children will have length 0. func (rcv *Field) Children(obj *Field, j int) bool { o := flatbuffers.UOffsetT(rcv._tab.Offset(14)) if o != 0 { @@ -129,9 +129,9 @@ func (rcv *Field) ChildrenLength() int { return 0 } -/// children apply only to nested data types like Struct, List and Union. For -/// primitive types children will have length 0. -/// User-defined metadata +// / children apply only to nested data types like Struct, List and Union. For +// / primitive types children will have length 0. +// / User-defined metadata func (rcv *Field) CustomMetadata(obj *KeyValue, j int) bool { o := flatbuffers.UOffsetT(rcv._tab.Offset(16)) if o != 0 { @@ -152,7 +152,7 @@ func (rcv *Field) CustomMetadataLength() int { return 0 } -/// User-defined metadata +// / User-defined metadata func FieldStart(builder *flatbuffers.Builder) { builder.StartObject(7) } diff --git a/go/arrow/internal/flatbuf/FieldNode.go b/go/arrow/internal/flatbuf/FieldNode.go index 606b30bfebbd2..0e258a3d2cde8 100644 --- a/go/arrow/internal/flatbuf/FieldNode.go +++ b/go/arrow/internal/flatbuf/FieldNode.go @@ -22,15 +22,15 @@ import ( flatbuffers "github.com/google/flatbuffers/go" ) -/// ---------------------------------------------------------------------- -/// Data structures for describing a table row batch (a collection of -/// equal-length Arrow arrays) -/// Metadata about a field at some level of a nested type tree (but not -/// its children). -/// -/// For example, a List with values `[[1, 2, 3], null, [4], [5, 6], null]` -/// would have {length: 5, null_count: 2} for its List node, and {length: 6, -/// null_count: 0} for its Int16 node, as separate FieldNode structs +// / ---------------------------------------------------------------------- +// / Data structures for describing a table row batch (a collection of +// / equal-length Arrow arrays) +// / Metadata about a field at some level of a nested type tree (but not +// / its children). +// / +// / For example, a List with values `[[1, 2, 3], null, [4], [5, 6], null]` +// / would have {length: 5, null_count: 2} for its List node, and {length: 6, +// / null_count: 0} for its Int16 node, as separate FieldNode structs type FieldNode struct { _tab flatbuffers.Struct } @@ -44,26 +44,28 @@ func (rcv *FieldNode) Table() flatbuffers.Table { return rcv._tab.Table } -/// The number of value slots in the Arrow array at this level of a nested -/// tree +// / The number of value slots in the Arrow array at this level of a nested +// / tree func (rcv *FieldNode) Length() int64 { return rcv._tab.GetInt64(rcv._tab.Pos + flatbuffers.UOffsetT(0)) } -/// The number of value slots in the Arrow array at this level of a nested -/// tree + +// / The number of value slots in the Arrow array at this level of a nested +// / tree func (rcv *FieldNode) MutateLength(n int64) bool { return rcv._tab.MutateInt64(rcv._tab.Pos+flatbuffers.UOffsetT(0), n) } -/// The number of observed nulls. Fields with null_count == 0 may choose not -/// to write their physical validity bitmap out as a materialized buffer, -/// instead setting the length of the bitmap buffer to 0. +// / The number of observed nulls. Fields with null_count == 0 may choose not +// / to write their physical validity bitmap out as a materialized buffer, +// / instead setting the length of the bitmap buffer to 0. func (rcv *FieldNode) NullCount() int64 { return rcv._tab.GetInt64(rcv._tab.Pos + flatbuffers.UOffsetT(8)) } -/// The number of observed nulls. Fields with null_count == 0 may choose not -/// to write their physical validity bitmap out as a materialized buffer, -/// instead setting the length of the bitmap buffer to 0. + +// / The number of observed nulls. Fields with null_count == 0 may choose not +// / to write their physical validity bitmap out as a materialized buffer, +// / instead setting the length of the bitmap buffer to 0. func (rcv *FieldNode) MutateNullCount(n int64) bool { return rcv._tab.MutateInt64(rcv._tab.Pos+flatbuffers.UOffsetT(8), n) } diff --git a/go/arrow/internal/flatbuf/FixedSizeBinary.go b/go/arrow/internal/flatbuf/FixedSizeBinary.go index 4e660d5077f71..2725dfb90b966 100644 --- a/go/arrow/internal/flatbuf/FixedSizeBinary.go +++ b/go/arrow/internal/flatbuf/FixedSizeBinary.go @@ -42,7 +42,7 @@ func (rcv *FixedSizeBinary) Table() flatbuffers.Table { return rcv._tab } -/// Number of bytes per value +// / Number of bytes per value func (rcv *FixedSizeBinary) ByteWidth() int32 { o := flatbuffers.UOffsetT(rcv._tab.Offset(4)) if o != 0 { @@ -51,7 +51,7 @@ func (rcv *FixedSizeBinary) ByteWidth() int32 { return 0 } -/// Number of bytes per value +// / Number of bytes per value func (rcv *FixedSizeBinary) MutateByteWidth(n int32) bool { return rcv._tab.MutateInt32Slot(4, n) } diff --git a/go/arrow/internal/flatbuf/FixedSizeList.go b/go/arrow/internal/flatbuf/FixedSizeList.go index dabf5cc8581da..534ca27f2fe21 100644 --- a/go/arrow/internal/flatbuf/FixedSizeList.go +++ b/go/arrow/internal/flatbuf/FixedSizeList.go @@ -42,7 +42,7 @@ func (rcv *FixedSizeList) Table() flatbuffers.Table { return rcv._tab } -/// Number of list items per value +// / Number of list items per value func (rcv *FixedSizeList) ListSize() int32 { o := flatbuffers.UOffsetT(rcv._tab.Offset(4)) if o != 0 { @@ -51,7 +51,7 @@ func (rcv *FixedSizeList) ListSize() int32 { return 0 } -/// Number of list items per value +// / Number of list items per value func (rcv *FixedSizeList) MutateListSize(n int32) bool { return rcv._tab.MutateInt32Slot(4, n) } diff --git a/go/arrow/internal/flatbuf/Footer.go b/go/arrow/internal/flatbuf/Footer.go index 65b0ff0954614..d65af41e7f62e 100644 --- a/go/arrow/internal/flatbuf/Footer.go +++ b/go/arrow/internal/flatbuf/Footer.go @@ -22,9 +22,9 @@ import ( flatbuffers "github.com/google/flatbuffers/go" ) -/// ---------------------------------------------------------------------- -/// Arrow File metadata -/// +// / ---------------------------------------------------------------------- +// / Arrow File metadata +// / type Footer struct { _tab flatbuffers.Table } @@ -108,7 +108,7 @@ func (rcv *Footer) RecordBatchesLength() int { return 0 } -/// User-defined metadata +// / User-defined metadata func (rcv *Footer) CustomMetadata(obj *KeyValue, j int) bool { o := flatbuffers.UOffsetT(rcv._tab.Offset(12)) if o != 0 { @@ -129,7 +129,7 @@ func (rcv *Footer) CustomMetadataLength() int { return 0 } -/// User-defined metadata +// / User-defined metadata func FooterStart(builder *flatbuffers.Builder) { builder.StartObject(5) } diff --git a/go/arrow/internal/flatbuf/KeyValue.go b/go/arrow/internal/flatbuf/KeyValue.go index c1b85318ecd5f..0cd5dc62923e3 100644 --- a/go/arrow/internal/flatbuf/KeyValue.go +++ b/go/arrow/internal/flatbuf/KeyValue.go @@ -22,9 +22,9 @@ import ( flatbuffers "github.com/google/flatbuffers/go" ) -/// ---------------------------------------------------------------------- -/// user defined key value pairs to add custom metadata to arrow -/// key namespacing is the responsibility of the user +// / ---------------------------------------------------------------------- +// / user defined key value pairs to add custom metadata to arrow +// / key namespacing is the responsibility of the user type KeyValue struct { _tab flatbuffers.Table } diff --git a/go/arrow/internal/flatbuf/LargeBinary.go b/go/arrow/internal/flatbuf/LargeBinary.go index 2c3befcc16fb9..b25ecc41aff51 100644 --- a/go/arrow/internal/flatbuf/LargeBinary.go +++ b/go/arrow/internal/flatbuf/LargeBinary.go @@ -22,8 +22,8 @@ import ( flatbuffers "github.com/google/flatbuffers/go" ) -/// Same as Binary, but with 64-bit offsets, allowing to represent -/// extremely large data values. +// / Same as Binary, but with 64-bit offsets, allowing to represent +// / extremely large data values. type LargeBinary struct { _tab flatbuffers.Table } diff --git a/go/arrow/internal/flatbuf/LargeList.go b/go/arrow/internal/flatbuf/LargeList.go index 92f2284587445..d8bfb9c07df76 100644 --- a/go/arrow/internal/flatbuf/LargeList.go +++ b/go/arrow/internal/flatbuf/LargeList.go @@ -22,8 +22,8 @@ import ( flatbuffers "github.com/google/flatbuffers/go" ) -/// Same as List, but with 64-bit offsets, allowing to represent -/// extremely large data values. +// / Same as List, but with 64-bit offsets, allowing to represent +// / extremely large data values. type LargeList struct { _tab flatbuffers.Table } diff --git a/go/arrow/internal/flatbuf/LargeListView.go b/go/arrow/internal/flatbuf/LargeListView.go index 5b1df149cd1e2..4608c1dec53d8 100644 --- a/go/arrow/internal/flatbuf/LargeListView.go +++ b/go/arrow/internal/flatbuf/LargeListView.go @@ -22,8 +22,8 @@ import ( flatbuffers "github.com/google/flatbuffers/go" ) -/// Same as ListView, but with 64-bit offsets and sizes, allowing to represent -/// extremely large data values. +// / Same as ListView, but with 64-bit offsets and sizes, allowing to represent +// / extremely large data values. type LargeListView struct { _tab flatbuffers.Table } diff --git a/go/arrow/internal/flatbuf/LargeUtf8.go b/go/arrow/internal/flatbuf/LargeUtf8.go index e78b33e110066..4478fed856e6d 100644 --- a/go/arrow/internal/flatbuf/LargeUtf8.go +++ b/go/arrow/internal/flatbuf/LargeUtf8.go @@ -22,8 +22,8 @@ import ( flatbuffers "github.com/google/flatbuffers/go" ) -/// Same as Utf8, but with 64-bit offsets, allowing to represent -/// extremely large data values. +// / Same as Utf8, but with 64-bit offsets, allowing to represent +// / extremely large data values. type LargeUtf8 struct { _tab flatbuffers.Table } diff --git a/go/arrow/internal/flatbuf/ListView.go b/go/arrow/internal/flatbuf/ListView.go index 46b1e0b3cbf2f..cde43cf5b6893 100644 --- a/go/arrow/internal/flatbuf/ListView.go +++ b/go/arrow/internal/flatbuf/ListView.go @@ -22,9 +22,9 @@ import ( flatbuffers "github.com/google/flatbuffers/go" ) -/// Represents the same logical types that List can, but contains offsets and -/// sizes allowing for writes in any order and sharing of child values among -/// list values. +// / Represents the same logical types that List can, but contains offsets and +// / sizes allowing for writes in any order and sharing of child values among +// / list values. type ListView struct { _tab flatbuffers.Table } diff --git a/go/arrow/internal/flatbuf/Map.go b/go/arrow/internal/flatbuf/Map.go index 8802aba1ebd39..d4871e558199f 100644 --- a/go/arrow/internal/flatbuf/Map.go +++ b/go/arrow/internal/flatbuf/Map.go @@ -22,31 +22,31 @@ import ( flatbuffers "github.com/google/flatbuffers/go" ) -/// A Map is a logical nested type that is represented as -/// -/// List> -/// -/// In this layout, the keys and values are each respectively contiguous. We do -/// not constrain the key and value types, so the application is responsible -/// for ensuring that the keys are hashable and unique. Whether the keys are sorted -/// may be set in the metadata for this field. -/// -/// In a field with Map type, the field has a child Struct field, which then -/// has two children: key type and the second the value type. The names of the -/// child fields may be respectively "entries", "key", and "value", but this is -/// not enforced. -/// -/// Map -/// ```text -/// - child[0] entries: Struct -/// - child[0] key: K -/// - child[1] value: V -/// ``` -/// Neither the "entries" field nor the "key" field may be nullable. -/// -/// The metadata is structured so that Arrow systems without special handling -/// for Map can make Map an alias for List. The "layout" attribute for the Map -/// field must have the same contents as a List. +// / A Map is a logical nested type that is represented as +// / +// / List> +// / +// / In this layout, the keys and values are each respectively contiguous. We do +// / not constrain the key and value types, so the application is responsible +// / for ensuring that the keys are hashable and unique. Whether the keys are sorted +// / may be set in the metadata for this field. +// / +// / In a field with Map type, the field has a child Struct field, which then +// / has two children: key type and the second the value type. The names of the +// / child fields may be respectively "entries", "key", and "value", but this is +// / not enforced. +// / +// / Map +// / ```text +// / - child[0] entries: Struct +// / - child[0] key: K +// / - child[1] value: V +// / ``` +// / Neither the "entries" field nor the "key" field may be nullable. +// / +// / The metadata is structured so that Arrow systems without special handling +// / for Map can make Map an alias for List. The "layout" attribute for the Map +// / field must have the same contents as a List. type Map struct { _tab flatbuffers.Table } @@ -67,7 +67,7 @@ func (rcv *Map) Table() flatbuffers.Table { return rcv._tab } -/// Set to true if the keys within each value are sorted +// / Set to true if the keys within each value are sorted func (rcv *Map) KeysSorted() bool { o := flatbuffers.UOffsetT(rcv._tab.Offset(4)) if o != 0 { @@ -76,7 +76,7 @@ func (rcv *Map) KeysSorted() bool { return false } -/// Set to true if the keys within each value are sorted +// / Set to true if the keys within each value are sorted func (rcv *Map) MutateKeysSorted(n bool) bool { return rcv._tab.MutateBoolSlot(4, n) } diff --git a/go/arrow/internal/flatbuf/MessageHeader.go b/go/arrow/internal/flatbuf/MessageHeader.go index c12fc1058119d..d7f9907c7a7a2 100644 --- a/go/arrow/internal/flatbuf/MessageHeader.go +++ b/go/arrow/internal/flatbuf/MessageHeader.go @@ -20,14 +20,14 @@ package flatbuf import "strconv" -/// ---------------------------------------------------------------------- -/// The root Message type -/// This union enables us to easily send different message types without -/// redundant storage, and in the future we can easily add new message types. -/// -/// Arrow implementations do not need to implement all of the message types, -/// which may include experimental metadata types. For maximum compatibility, -/// it is best to send data using RecordBatch +// / ---------------------------------------------------------------------- +// / The root Message type +// / This union enables us to easily send different message types without +// / redundant storage, and in the future we can easily add new message types. +// / +// / Arrow implementations do not need to implement all of the message types, +// / which may include experimental metadata types. For maximum compatibility, +// / it is best to send data using RecordBatch type MessageHeader byte const ( diff --git a/go/arrow/internal/flatbuf/Null.go b/go/arrow/internal/flatbuf/Null.go index 3c3eb4bda3619..3b93a1b6ee965 100644 --- a/go/arrow/internal/flatbuf/Null.go +++ b/go/arrow/internal/flatbuf/Null.go @@ -22,7 +22,7 @@ import ( flatbuffers "github.com/google/flatbuffers/go" ) -/// These are stored in the flatbuffer in the Type union below +// / These are stored in the flatbuffer in the Type union below type Null struct { _tab flatbuffers.Table } diff --git a/go/arrow/internal/flatbuf/RecordBatch.go b/go/arrow/internal/flatbuf/RecordBatch.go index c50f4a6e868ea..52c72a8a20ae4 100644 --- a/go/arrow/internal/flatbuf/RecordBatch.go +++ b/go/arrow/internal/flatbuf/RecordBatch.go @@ -22,9 +22,9 @@ import ( flatbuffers "github.com/google/flatbuffers/go" ) -/// A data header describing the shared memory layout of a "record" or "row" -/// batch. Some systems call this a "row batch" internally and others a "record -/// batch". +// / A data header describing the shared memory layout of a "record" or "row" +// / batch. Some systems call this a "row batch" internally and others a "record +// / batch". type RecordBatch struct { _tab flatbuffers.Table } @@ -45,8 +45,8 @@ func (rcv *RecordBatch) Table() flatbuffers.Table { return rcv._tab } -/// number of records / rows. The arrays in the batch should all have this -/// length +// / number of records / rows. The arrays in the batch should all have this +// / length func (rcv *RecordBatch) Length() int64 { o := flatbuffers.UOffsetT(rcv._tab.Offset(4)) if o != 0 { @@ -55,13 +55,13 @@ func (rcv *RecordBatch) Length() int64 { return 0 } -/// number of records / rows. The arrays in the batch should all have this -/// length +// / number of records / rows. The arrays in the batch should all have this +// / length func (rcv *RecordBatch) MutateLength(n int64) bool { return rcv._tab.MutateInt64Slot(4, n) } -/// Nodes correspond to the pre-ordered flattened logical schema +// / Nodes correspond to the pre-ordered flattened logical schema func (rcv *RecordBatch) Nodes(obj *FieldNode, j int) bool { o := flatbuffers.UOffsetT(rcv._tab.Offset(6)) if o != 0 { @@ -81,13 +81,13 @@ func (rcv *RecordBatch) NodesLength() int { return 0 } -/// Nodes correspond to the pre-ordered flattened logical schema -/// Buffers correspond to the pre-ordered flattened buffer tree -/// -/// The number of buffers appended to this list depends on the schema. For -/// example, most primitive arrays will have 2 buffers, 1 for the validity -/// bitmap and 1 for the values. For struct arrays, there will only be a -/// single buffer for the validity (nulls) bitmap +// / Nodes correspond to the pre-ordered flattened logical schema +// / Buffers correspond to the pre-ordered flattened buffer tree +// / +// / The number of buffers appended to this list depends on the schema. For +// / example, most primitive arrays will have 2 buffers, 1 for the validity +// / bitmap and 1 for the values. For struct arrays, there will only be a +// / single buffer for the validity (nulls) bitmap func (rcv *RecordBatch) Buffers(obj *Buffer, j int) bool { o := flatbuffers.UOffsetT(rcv._tab.Offset(8)) if o != 0 { @@ -107,13 +107,13 @@ func (rcv *RecordBatch) BuffersLength() int { return 0 } -/// Buffers correspond to the pre-ordered flattened buffer tree -/// -/// The number of buffers appended to this list depends on the schema. For -/// example, most primitive arrays will have 2 buffers, 1 for the validity -/// bitmap and 1 for the values. For struct arrays, there will only be a -/// single buffer for the validity (nulls) bitmap -/// Optional compression of the message body +// / Buffers correspond to the pre-ordered flattened buffer tree +// / +// / The number of buffers appended to this list depends on the schema. For +// / example, most primitive arrays will have 2 buffers, 1 for the validity +// / bitmap and 1 for the values. For struct arrays, there will only be a +// / single buffer for the validity (nulls) bitmap +// / Optional compression of the message body func (rcv *RecordBatch) Compression(obj *BodyCompression) *BodyCompression { o := flatbuffers.UOffsetT(rcv._tab.Offset(10)) if o != 0 { @@ -127,21 +127,21 @@ func (rcv *RecordBatch) Compression(obj *BodyCompression) *BodyCompression { return nil } -/// Optional compression of the message body -/// Some types such as Utf8View are represented using a variable number of buffers. -/// For each such Field in the pre-ordered flattened logical schema, there will be -/// an entry in variadicBufferCounts to indicate the number of number of variadic -/// buffers which belong to that Field in the current RecordBatch. -/// -/// For example, the schema -/// col1: Struct -/// col2: Utf8View -/// contains two Fields with variadic buffers so variadicBufferCounts will have -/// two entries, the first counting the variadic buffers of `col1.beta` and the -/// second counting `col2`'s. -/// -/// This field may be omitted if and only if the schema contains no Fields with -/// a variable number of buffers, such as BinaryView and Utf8View. +// / Optional compression of the message body +// / Some types such as Utf8View are represented using a variable number of buffers. +// / For each such Field in the pre-ordered flattened logical schema, there will be +// / an entry in variadicBufferCounts to indicate the number of number of variadic +// / buffers which belong to that Field in the current RecordBatch. +// / +// / For example, the schema +// / col1: Struct +// / col2: Utf8View +// / contains two Fields with variadic buffers so variadicBufferCounts will have +// / two entries, the first counting the variadic buffers of `col1.beta` and the +// / second counting `col2`'s. +// / +// / This field may be omitted if and only if the schema contains no Fields with +// / a variable number of buffers, such as BinaryView and Utf8View. func (rcv *RecordBatch) VariadicBufferCounts(j int) int64 { o := flatbuffers.UOffsetT(rcv._tab.Offset(12)) if o != 0 { @@ -159,20 +159,20 @@ func (rcv *RecordBatch) VariadicBufferCountsLength() int { return 0 } -/// Some types such as Utf8View are represented using a variable number of buffers. -/// For each such Field in the pre-ordered flattened logical schema, there will be -/// an entry in variadicBufferCounts to indicate the number of number of variadic -/// buffers which belong to that Field in the current RecordBatch. -/// -/// For example, the schema -/// col1: Struct -/// col2: Utf8View -/// contains two Fields with variadic buffers so variadicBufferCounts will have -/// two entries, the first counting the variadic buffers of `col1.beta` and the -/// second counting `col2`'s. -/// -/// This field may be omitted if and only if the schema contains no Fields with -/// a variable number of buffers, such as BinaryView and Utf8View. +// / Some types such as Utf8View are represented using a variable number of buffers. +// / For each such Field in the pre-ordered flattened logical schema, there will be +// / an entry in variadicBufferCounts to indicate the number of number of variadic +// / buffers which belong to that Field in the current RecordBatch. +// / +// / For example, the schema +// / col1: Struct +// / col2: Utf8View +// / contains two Fields with variadic buffers so variadicBufferCounts will have +// / two entries, the first counting the variadic buffers of `col1.beta` and the +// / second counting `col2`'s. +// / +// / This field may be omitted if and only if the schema contains no Fields with +// / a variable number of buffers, such as BinaryView and Utf8View. func (rcv *RecordBatch) MutateVariadicBufferCounts(j int, n int64) bool { o := flatbuffers.UOffsetT(rcv._tab.Offset(12)) if o != 0 { diff --git a/go/arrow/internal/flatbuf/RunEndEncoded.go b/go/arrow/internal/flatbuf/RunEndEncoded.go index fa414c1bf0eed..b88460b2e22bc 100644 --- a/go/arrow/internal/flatbuf/RunEndEncoded.go +++ b/go/arrow/internal/flatbuf/RunEndEncoded.go @@ -22,11 +22,11 @@ import ( flatbuffers "github.com/google/flatbuffers/go" ) -/// Contains two child arrays, run_ends and values. -/// The run_ends child array must be a 16/32/64-bit integer array -/// which encodes the indices at which the run with the value in -/// each corresponding index in the values child array ends. -/// Like list/struct types, the value array can be of any type. +// / Contains two child arrays, run_ends and values. +// / The run_ends child array must be a 16/32/64-bit integer array +// / which encodes the indices at which the run with the value in +// / each corresponding index in the values child array ends. +// / Like list/struct types, the value array can be of any type. type RunEndEncoded struct { _tab flatbuffers.Table } diff --git a/go/arrow/internal/flatbuf/Schema.go b/go/arrow/internal/flatbuf/Schema.go index 4ee5ecc9e5e40..ae5b248a766e3 100644 --- a/go/arrow/internal/flatbuf/Schema.go +++ b/go/arrow/internal/flatbuf/Schema.go @@ -22,8 +22,8 @@ import ( flatbuffers "github.com/google/flatbuffers/go" ) -/// ---------------------------------------------------------------------- -/// A Schema describes the columns in a row batch +// / ---------------------------------------------------------------------- +// / A Schema describes the columns in a row batch type Schema struct { _tab flatbuffers.Table } @@ -44,9 +44,9 @@ func (rcv *Schema) Table() flatbuffers.Table { return rcv._tab } -/// endianness of the buffer -/// it is Little Endian by default -/// if endianness doesn't match the underlying system then the vectors need to be converted +// / endianness of the buffer +// / it is Little Endian by default +// / if endianness doesn't match the underlying system then the vectors need to be converted func (rcv *Schema) Endianness() Endianness { o := flatbuffers.UOffsetT(rcv._tab.Offset(4)) if o != 0 { @@ -55,9 +55,9 @@ func (rcv *Schema) Endianness() Endianness { return 0 } -/// endianness of the buffer -/// it is Little Endian by default -/// if endianness doesn't match the underlying system then the vectors need to be converted +// / endianness of the buffer +// / it is Little Endian by default +// / if endianness doesn't match the underlying system then the vectors need to be converted func (rcv *Schema) MutateEndianness(n Endianness) bool { return rcv._tab.MutateInt16Slot(4, int16(n)) } @@ -102,7 +102,7 @@ func (rcv *Schema) CustomMetadataLength() int { return 0 } -/// Features used in the stream/file. +// / Features used in the stream/file. func (rcv *Schema) Features(j int) Feature { o := flatbuffers.UOffsetT(rcv._tab.Offset(10)) if o != 0 { @@ -120,7 +120,7 @@ func (rcv *Schema) FeaturesLength() int { return 0 } -/// Features used in the stream/file. +// / Features used in the stream/file. func (rcv *Schema) MutateFeatures(j int, n Feature) bool { o := flatbuffers.UOffsetT(rcv._tab.Offset(10)) if o != 0 { diff --git a/go/arrow/internal/flatbuf/SparseMatrixIndexCSR.go b/go/arrow/internal/flatbuf/SparseMatrixIndexCSR.go index de8217650b281..2477af100355c 100644 --- a/go/arrow/internal/flatbuf/SparseMatrixIndexCSR.go +++ b/go/arrow/internal/flatbuf/SparseMatrixIndexCSR.go @@ -22,7 +22,7 @@ import ( flatbuffers "github.com/google/flatbuffers/go" ) -/// Compressed Sparse Row format, that is matrix-specific. +// / Compressed Sparse Row format, that is matrix-specific. type SparseMatrixIndexCSR struct { _tab flatbuffers.Table } @@ -43,7 +43,7 @@ func (rcv *SparseMatrixIndexCSR) Table() flatbuffers.Table { return rcv._tab } -/// The type of values in indptrBuffer +// / The type of values in indptrBuffer func (rcv *SparseMatrixIndexCSR) IndptrType(obj *Int) *Int { o := flatbuffers.UOffsetT(rcv._tab.Offset(4)) if o != 0 { @@ -57,29 +57,29 @@ func (rcv *SparseMatrixIndexCSR) IndptrType(obj *Int) *Int { return nil } -/// The type of values in indptrBuffer -/// indptrBuffer stores the location and size of indptr array that -/// represents the range of the rows. -/// The i-th row spans from indptr[i] to indptr[i+1] in the data. -/// The length of this array is 1 + (the number of rows), and the type -/// of index value is long. -/// -/// For example, let X be the following 6x4 matrix: -/// -/// X := [[0, 1, 2, 0], -/// [0, 0, 3, 0], -/// [0, 4, 0, 5], -/// [0, 0, 0, 0], -/// [6, 0, 7, 8], -/// [0, 9, 0, 0]]. -/// -/// The array of non-zero values in X is: -/// -/// values(X) = [1, 2, 3, 4, 5, 6, 7, 8, 9]. -/// -/// And the indptr of X is: -/// -/// indptr(X) = [0, 2, 3, 5, 5, 8, 10]. +// / The type of values in indptrBuffer +// / indptrBuffer stores the location and size of indptr array that +// / represents the range of the rows. +// / The i-th row spans from indptr[i] to indptr[i+1] in the data. +// / The length of this array is 1 + (the number of rows), and the type +// / of index value is long. +// / +// / For example, let X be the following 6x4 matrix: +// / +// / X := [[0, 1, 2, 0], +// / [0, 0, 3, 0], +// / [0, 4, 0, 5], +// / [0, 0, 0, 0], +// / [6, 0, 7, 8], +// / [0, 9, 0, 0]]. +// / +// / The array of non-zero values in X is: +// / +// / values(X) = [1, 2, 3, 4, 5, 6, 7, 8, 9]. +// / +// / And the indptr of X is: +// / +// / indptr(X) = [0, 2, 3, 5, 5, 8, 10]. func (rcv *SparseMatrixIndexCSR) IndptrBuffer(obj *Buffer) *Buffer { o := flatbuffers.UOffsetT(rcv._tab.Offset(6)) if o != 0 { @@ -93,29 +93,29 @@ func (rcv *SparseMatrixIndexCSR) IndptrBuffer(obj *Buffer) *Buffer { return nil } -/// indptrBuffer stores the location and size of indptr array that -/// represents the range of the rows. -/// The i-th row spans from indptr[i] to indptr[i+1] in the data. -/// The length of this array is 1 + (the number of rows), and the type -/// of index value is long. -/// -/// For example, let X be the following 6x4 matrix: -/// -/// X := [[0, 1, 2, 0], -/// [0, 0, 3, 0], -/// [0, 4, 0, 5], -/// [0, 0, 0, 0], -/// [6, 0, 7, 8], -/// [0, 9, 0, 0]]. -/// -/// The array of non-zero values in X is: -/// -/// values(X) = [1, 2, 3, 4, 5, 6, 7, 8, 9]. -/// -/// And the indptr of X is: -/// -/// indptr(X) = [0, 2, 3, 5, 5, 8, 10]. -/// The type of values in indicesBuffer +// / indptrBuffer stores the location and size of indptr array that +// / represents the range of the rows. +// / The i-th row spans from indptr[i] to indptr[i+1] in the data. +// / The length of this array is 1 + (the number of rows), and the type +// / of index value is long. +// / +// / For example, let X be the following 6x4 matrix: +// / +// / X := [[0, 1, 2, 0], +// / [0, 0, 3, 0], +// / [0, 4, 0, 5], +// / [0, 0, 0, 0], +// / [6, 0, 7, 8], +// / [0, 9, 0, 0]]. +// / +// / The array of non-zero values in X is: +// / +// / values(X) = [1, 2, 3, 4, 5, 6, 7, 8, 9]. +// / +// / And the indptr of X is: +// / +// / indptr(X) = [0, 2, 3, 5, 5, 8, 10]. +// / The type of values in indicesBuffer func (rcv *SparseMatrixIndexCSR) IndicesType(obj *Int) *Int { o := flatbuffers.UOffsetT(rcv._tab.Offset(8)) if o != 0 { @@ -129,16 +129,16 @@ func (rcv *SparseMatrixIndexCSR) IndicesType(obj *Int) *Int { return nil } -/// The type of values in indicesBuffer -/// indicesBuffer stores the location and size of the array that -/// contains the column indices of the corresponding non-zero values. -/// The type of index value is long. -/// -/// For example, the indices of the above X is: -/// -/// indices(X) = [1, 2, 2, 1, 3, 0, 2, 3, 1]. -/// -/// Note that the indices are sorted in lexicographical order for each row. +// / The type of values in indicesBuffer +// / indicesBuffer stores the location and size of the array that +// / contains the column indices of the corresponding non-zero values. +// / The type of index value is long. +// / +// / For example, the indices of the above X is: +// / +// / indices(X) = [1, 2, 2, 1, 3, 0, 2, 3, 1]. +// / +// / Note that the indices are sorted in lexicographical order for each row. func (rcv *SparseMatrixIndexCSR) IndicesBuffer(obj *Buffer) *Buffer { o := flatbuffers.UOffsetT(rcv._tab.Offset(10)) if o != 0 { @@ -152,15 +152,15 @@ func (rcv *SparseMatrixIndexCSR) IndicesBuffer(obj *Buffer) *Buffer { return nil } -/// indicesBuffer stores the location and size of the array that -/// contains the column indices of the corresponding non-zero values. -/// The type of index value is long. -/// -/// For example, the indices of the above X is: -/// -/// indices(X) = [1, 2, 2, 1, 3, 0, 2, 3, 1]. -/// -/// Note that the indices are sorted in lexicographical order for each row. +// / indicesBuffer stores the location and size of the array that +// / contains the column indices of the corresponding non-zero values. +// / The type of index value is long. +// / +// / For example, the indices of the above X is: +// / +// / indices(X) = [1, 2, 2, 1, 3, 0, 2, 3, 1]. +// / +// / Note that the indices are sorted in lexicographical order for each row. func SparseMatrixIndexCSRStart(builder *flatbuffers.Builder) { builder.StartObject(4) } diff --git a/go/arrow/internal/flatbuf/SparseMatrixIndexCSX.go b/go/arrow/internal/flatbuf/SparseMatrixIndexCSX.go index c28cc5d082fac..7f262deedbfc1 100644 --- a/go/arrow/internal/flatbuf/SparseMatrixIndexCSX.go +++ b/go/arrow/internal/flatbuf/SparseMatrixIndexCSX.go @@ -22,7 +22,7 @@ import ( flatbuffers "github.com/google/flatbuffers/go" ) -/// Compressed Sparse format, that is matrix-specific. +// / Compressed Sparse format, that is matrix-specific. type SparseMatrixIndexCSX struct { _tab flatbuffers.Table } @@ -43,7 +43,7 @@ func (rcv *SparseMatrixIndexCSX) Table() flatbuffers.Table { return rcv._tab } -/// Which axis, row or column, is compressed +// / Which axis, row or column, is compressed func (rcv *SparseMatrixIndexCSX) CompressedAxis() SparseMatrixCompressedAxis { o := flatbuffers.UOffsetT(rcv._tab.Offset(4)) if o != 0 { @@ -52,12 +52,12 @@ func (rcv *SparseMatrixIndexCSX) CompressedAxis() SparseMatrixCompressedAxis { return 0 } -/// Which axis, row or column, is compressed +// / Which axis, row or column, is compressed func (rcv *SparseMatrixIndexCSX) MutateCompressedAxis(n SparseMatrixCompressedAxis) bool { return rcv._tab.MutateInt16Slot(4, int16(n)) } -/// The type of values in indptrBuffer +// / The type of values in indptrBuffer func (rcv *SparseMatrixIndexCSX) IndptrType(obj *Int) *Int { o := flatbuffers.UOffsetT(rcv._tab.Offset(6)) if o != 0 { @@ -71,30 +71,30 @@ func (rcv *SparseMatrixIndexCSX) IndptrType(obj *Int) *Int { return nil } -/// The type of values in indptrBuffer -/// indptrBuffer stores the location and size of indptr array that -/// represents the range of the rows. -/// The i-th row spans from `indptr[i]` to `indptr[i+1]` in the data. -/// The length of this array is 1 + (the number of rows), and the type -/// of index value is long. -/// -/// For example, let X be the following 6x4 matrix: -/// ```text -/// X := [[0, 1, 2, 0], -/// [0, 0, 3, 0], -/// [0, 4, 0, 5], -/// [0, 0, 0, 0], -/// [6, 0, 7, 8], -/// [0, 9, 0, 0]]. -/// ``` -/// The array of non-zero values in X is: -/// ```text -/// values(X) = [1, 2, 3, 4, 5, 6, 7, 8, 9]. -/// ``` -/// And the indptr of X is: -/// ```text -/// indptr(X) = [0, 2, 3, 5, 5, 8, 10]. -/// ``` +// / The type of values in indptrBuffer +// / indptrBuffer stores the location and size of indptr array that +// / represents the range of the rows. +// / The i-th row spans from `indptr[i]` to `indptr[i+1]` in the data. +// / The length of this array is 1 + (the number of rows), and the type +// / of index value is long. +// / +// / For example, let X be the following 6x4 matrix: +// / ```text +// / X := [[0, 1, 2, 0], +// / [0, 0, 3, 0], +// / [0, 4, 0, 5], +// / [0, 0, 0, 0], +// / [6, 0, 7, 8], +// / [0, 9, 0, 0]]. +// / ``` +// / The array of non-zero values in X is: +// / ```text +// / values(X) = [1, 2, 3, 4, 5, 6, 7, 8, 9]. +// / ``` +// / And the indptr of X is: +// / ```text +// / indptr(X) = [0, 2, 3, 5, 5, 8, 10]. +// / ``` func (rcv *SparseMatrixIndexCSX) IndptrBuffer(obj *Buffer) *Buffer { o := flatbuffers.UOffsetT(rcv._tab.Offset(8)) if o != 0 { @@ -108,30 +108,30 @@ func (rcv *SparseMatrixIndexCSX) IndptrBuffer(obj *Buffer) *Buffer { return nil } -/// indptrBuffer stores the location and size of indptr array that -/// represents the range of the rows. -/// The i-th row spans from `indptr[i]` to `indptr[i+1]` in the data. -/// The length of this array is 1 + (the number of rows), and the type -/// of index value is long. -/// -/// For example, let X be the following 6x4 matrix: -/// ```text -/// X := [[0, 1, 2, 0], -/// [0, 0, 3, 0], -/// [0, 4, 0, 5], -/// [0, 0, 0, 0], -/// [6, 0, 7, 8], -/// [0, 9, 0, 0]]. -/// ``` -/// The array of non-zero values in X is: -/// ```text -/// values(X) = [1, 2, 3, 4, 5, 6, 7, 8, 9]. -/// ``` -/// And the indptr of X is: -/// ```text -/// indptr(X) = [0, 2, 3, 5, 5, 8, 10]. -/// ``` -/// The type of values in indicesBuffer +// / indptrBuffer stores the location and size of indptr array that +// / represents the range of the rows. +// / The i-th row spans from `indptr[i]` to `indptr[i+1]` in the data. +// / The length of this array is 1 + (the number of rows), and the type +// / of index value is long. +// / +// / For example, let X be the following 6x4 matrix: +// / ```text +// / X := [[0, 1, 2, 0], +// / [0, 0, 3, 0], +// / [0, 4, 0, 5], +// / [0, 0, 0, 0], +// / [6, 0, 7, 8], +// / [0, 9, 0, 0]]. +// / ``` +// / The array of non-zero values in X is: +// / ```text +// / values(X) = [1, 2, 3, 4, 5, 6, 7, 8, 9]. +// / ``` +// / And the indptr of X is: +// / ```text +// / indptr(X) = [0, 2, 3, 5, 5, 8, 10]. +// / ``` +// / The type of values in indicesBuffer func (rcv *SparseMatrixIndexCSX) IndicesType(obj *Int) *Int { o := flatbuffers.UOffsetT(rcv._tab.Offset(10)) if o != 0 { @@ -145,16 +145,16 @@ func (rcv *SparseMatrixIndexCSX) IndicesType(obj *Int) *Int { return nil } -/// The type of values in indicesBuffer -/// indicesBuffer stores the location and size of the array that -/// contains the column indices of the corresponding non-zero values. -/// The type of index value is long. -/// -/// For example, the indices of the above X is: -/// ```text -/// indices(X) = [1, 2, 2, 1, 3, 0, 2, 3, 1]. -/// ``` -/// Note that the indices are sorted in lexicographical order for each row. +// / The type of values in indicesBuffer +// / indicesBuffer stores the location and size of the array that +// / contains the column indices of the corresponding non-zero values. +// / The type of index value is long. +// / +// / For example, the indices of the above X is: +// / ```text +// / indices(X) = [1, 2, 2, 1, 3, 0, 2, 3, 1]. +// / ``` +// / Note that the indices are sorted in lexicographical order for each row. func (rcv *SparseMatrixIndexCSX) IndicesBuffer(obj *Buffer) *Buffer { o := flatbuffers.UOffsetT(rcv._tab.Offset(12)) if o != 0 { @@ -168,15 +168,15 @@ func (rcv *SparseMatrixIndexCSX) IndicesBuffer(obj *Buffer) *Buffer { return nil } -/// indicesBuffer stores the location and size of the array that -/// contains the column indices of the corresponding non-zero values. -/// The type of index value is long. -/// -/// For example, the indices of the above X is: -/// ```text -/// indices(X) = [1, 2, 2, 1, 3, 0, 2, 3, 1]. -/// ``` -/// Note that the indices are sorted in lexicographical order for each row. +// / indicesBuffer stores the location and size of the array that +// / contains the column indices of the corresponding non-zero values. +// / The type of index value is long. +// / +// / For example, the indices of the above X is: +// / ```text +// / indices(X) = [1, 2, 2, 1, 3, 0, 2, 3, 1]. +// / ``` +// / Note that the indices are sorted in lexicographical order for each row. func SparseMatrixIndexCSXStart(builder *flatbuffers.Builder) { builder.StartObject(5) } diff --git a/go/arrow/internal/flatbuf/SparseTensor.go b/go/arrow/internal/flatbuf/SparseTensor.go index 6f3f55797d755..8f67e1fc08b84 100644 --- a/go/arrow/internal/flatbuf/SparseTensor.go +++ b/go/arrow/internal/flatbuf/SparseTensor.go @@ -54,9 +54,9 @@ func (rcv *SparseTensor) MutateTypeType(n Type) bool { return rcv._tab.MutateByteSlot(4, byte(n)) } -/// The type of data contained in a value cell. -/// Currently only fixed-width value types are supported, -/// no strings or nested types. +// / The type of data contained in a value cell. +// / Currently only fixed-width value types are supported, +// / no strings or nested types. func (rcv *SparseTensor) Type(obj *flatbuffers.Table) bool { o := flatbuffers.UOffsetT(rcv._tab.Offset(6)) if o != 0 { @@ -66,10 +66,10 @@ func (rcv *SparseTensor) Type(obj *flatbuffers.Table) bool { return false } -/// The type of data contained in a value cell. -/// Currently only fixed-width value types are supported, -/// no strings or nested types. -/// The dimensions of the tensor, optionally named. +// / The type of data contained in a value cell. +// / Currently only fixed-width value types are supported, +// / no strings or nested types. +// / The dimensions of the tensor, optionally named. func (rcv *SparseTensor) Shape(obj *TensorDim, j int) bool { o := flatbuffers.UOffsetT(rcv._tab.Offset(8)) if o != 0 { @@ -90,8 +90,8 @@ func (rcv *SparseTensor) ShapeLength() int { return 0 } -/// The dimensions of the tensor, optionally named. -/// The number of non-zero values in a sparse tensor. +// / The dimensions of the tensor, optionally named. +// / The number of non-zero values in a sparse tensor. func (rcv *SparseTensor) NonZeroLength() int64 { o := flatbuffers.UOffsetT(rcv._tab.Offset(10)) if o != 0 { @@ -100,7 +100,7 @@ func (rcv *SparseTensor) NonZeroLength() int64 { return 0 } -/// The number of non-zero values in a sparse tensor. +// / The number of non-zero values in a sparse tensor. func (rcv *SparseTensor) MutateNonZeroLength(n int64) bool { return rcv._tab.MutateInt64Slot(10, n) } @@ -117,7 +117,7 @@ func (rcv *SparseTensor) MutateSparseIndexType(n SparseTensorIndex) bool { return rcv._tab.MutateByteSlot(12, byte(n)) } -/// Sparse tensor index +// / Sparse tensor index func (rcv *SparseTensor) SparseIndex(obj *flatbuffers.Table) bool { o := flatbuffers.UOffsetT(rcv._tab.Offset(14)) if o != 0 { @@ -127,8 +127,8 @@ func (rcv *SparseTensor) SparseIndex(obj *flatbuffers.Table) bool { return false } -/// Sparse tensor index -/// The location and size of the tensor's data +// / Sparse tensor index +// / The location and size of the tensor's data func (rcv *SparseTensor) Data(obj *Buffer) *Buffer { o := flatbuffers.UOffsetT(rcv._tab.Offset(16)) if o != 0 { @@ -142,7 +142,7 @@ func (rcv *SparseTensor) Data(obj *Buffer) *Buffer { return nil } -/// The location and size of the tensor's data +// / The location and size of the tensor's data func SparseTensorStart(builder *flatbuffers.Builder) { builder.StartObject(7) } diff --git a/go/arrow/internal/flatbuf/SparseTensorIndexCOO.go b/go/arrow/internal/flatbuf/SparseTensorIndexCOO.go index f8eee99fa691e..bf1c218e2e415 100644 --- a/go/arrow/internal/flatbuf/SparseTensorIndexCOO.go +++ b/go/arrow/internal/flatbuf/SparseTensorIndexCOO.go @@ -22,38 +22,38 @@ import ( flatbuffers "github.com/google/flatbuffers/go" ) -/// ---------------------------------------------------------------------- -/// EXPERIMENTAL: Data structures for sparse tensors -/// Coordinate (COO) format of sparse tensor index. -/// -/// COO's index list are represented as a NxM matrix, -/// where N is the number of non-zero values, -/// and M is the number of dimensions of a sparse tensor. -/// -/// indicesBuffer stores the location and size of the data of this indices -/// matrix. The value type and the stride of the indices matrix is -/// specified in indicesType and indicesStrides fields. -/// -/// For example, let X be a 2x3x4x5 tensor, and it has the following -/// 6 non-zero values: -/// ```text -/// X[0, 1, 2, 0] := 1 -/// X[1, 1, 2, 3] := 2 -/// X[0, 2, 1, 0] := 3 -/// X[0, 1, 3, 0] := 4 -/// X[0, 1, 2, 1] := 5 -/// X[1, 2, 0, 4] := 6 -/// ``` -/// In COO format, the index matrix of X is the following 4x6 matrix: -/// ```text -/// [[0, 0, 0, 0, 1, 1], -/// [1, 1, 1, 2, 1, 2], -/// [2, 2, 3, 1, 2, 0], -/// [0, 1, 0, 0, 3, 4]] -/// ``` -/// When isCanonical is true, the indices is sorted in lexicographical order -/// (row-major order), and it does not have duplicated entries. Otherwise, -/// the indices may not be sorted, or may have duplicated entries. +// / ---------------------------------------------------------------------- +// / EXPERIMENTAL: Data structures for sparse tensors +// / Coordinate (COO) format of sparse tensor index. +// / +// / COO's index list are represented as a NxM matrix, +// / where N is the number of non-zero values, +// / and M is the number of dimensions of a sparse tensor. +// / +// / indicesBuffer stores the location and size of the data of this indices +// / matrix. The value type and the stride of the indices matrix is +// / specified in indicesType and indicesStrides fields. +// / +// / For example, let X be a 2x3x4x5 tensor, and it has the following +// / 6 non-zero values: +// / ```text +// / X[0, 1, 2, 0] := 1 +// / X[1, 1, 2, 3] := 2 +// / X[0, 2, 1, 0] := 3 +// / X[0, 1, 3, 0] := 4 +// / X[0, 1, 2, 1] := 5 +// / X[1, 2, 0, 4] := 6 +// / ``` +// / In COO format, the index matrix of X is the following 4x6 matrix: +// / ```text +// / [[0, 0, 0, 0, 1, 1], +// / [1, 1, 1, 2, 1, 2], +// / [2, 2, 3, 1, 2, 0], +// / [0, 1, 0, 0, 3, 4]] +// / ``` +// / When isCanonical is true, the indices is sorted in lexicographical order +// / (row-major order), and it does not have duplicated entries. Otherwise, +// / the indices may not be sorted, or may have duplicated entries. type SparseTensorIndexCOO struct { _tab flatbuffers.Table } @@ -74,7 +74,7 @@ func (rcv *SparseTensorIndexCOO) Table() flatbuffers.Table { return rcv._tab } -/// The type of values in indicesBuffer +// / The type of values in indicesBuffer func (rcv *SparseTensorIndexCOO) IndicesType(obj *Int) *Int { o := flatbuffers.UOffsetT(rcv._tab.Offset(4)) if o != 0 { @@ -88,9 +88,9 @@ func (rcv *SparseTensorIndexCOO) IndicesType(obj *Int) *Int { return nil } -/// The type of values in indicesBuffer -/// Non-negative byte offsets to advance one value cell along each dimension -/// If omitted, default to row-major order (C-like). +// / The type of values in indicesBuffer +// / Non-negative byte offsets to advance one value cell along each dimension +// / If omitted, default to row-major order (C-like). func (rcv *SparseTensorIndexCOO) IndicesStrides(j int) int64 { o := flatbuffers.UOffsetT(rcv._tab.Offset(6)) if o != 0 { @@ -108,8 +108,8 @@ func (rcv *SparseTensorIndexCOO) IndicesStridesLength() int { return 0 } -/// Non-negative byte offsets to advance one value cell along each dimension -/// If omitted, default to row-major order (C-like). +// / Non-negative byte offsets to advance one value cell along each dimension +// / If omitted, default to row-major order (C-like). func (rcv *SparseTensorIndexCOO) MutateIndicesStrides(j int, n int64) bool { o := flatbuffers.UOffsetT(rcv._tab.Offset(6)) if o != 0 { @@ -119,7 +119,7 @@ func (rcv *SparseTensorIndexCOO) MutateIndicesStrides(j int, n int64) bool { return false } -/// The location and size of the indices matrix's data +// / The location and size of the indices matrix's data func (rcv *SparseTensorIndexCOO) IndicesBuffer(obj *Buffer) *Buffer { o := flatbuffers.UOffsetT(rcv._tab.Offset(8)) if o != 0 { @@ -133,12 +133,12 @@ func (rcv *SparseTensorIndexCOO) IndicesBuffer(obj *Buffer) *Buffer { return nil } -/// The location and size of the indices matrix's data -/// This flag is true if and only if the indices matrix is sorted in -/// row-major order, and does not have duplicated entries. -/// This sort order is the same as of Tensorflow's SparseTensor, -/// but it is inverse order of SciPy's canonical coo_matrix -/// (SciPy employs column-major order for its coo_matrix). +// / The location and size of the indices matrix's data +// / This flag is true if and only if the indices matrix is sorted in +// / row-major order, and does not have duplicated entries. +// / This sort order is the same as of Tensorflow's SparseTensor, +// / but it is inverse order of SciPy's canonical coo_matrix +// / (SciPy employs column-major order for its coo_matrix). func (rcv *SparseTensorIndexCOO) IsCanonical() bool { o := flatbuffers.UOffsetT(rcv._tab.Offset(10)) if o != 0 { @@ -147,11 +147,11 @@ func (rcv *SparseTensorIndexCOO) IsCanonical() bool { return false } -/// This flag is true if and only if the indices matrix is sorted in -/// row-major order, and does not have duplicated entries. -/// This sort order is the same as of Tensorflow's SparseTensor, -/// but it is inverse order of SciPy's canonical coo_matrix -/// (SciPy employs column-major order for its coo_matrix). +// / This flag is true if and only if the indices matrix is sorted in +// / row-major order, and does not have duplicated entries. +// / This sort order is the same as of Tensorflow's SparseTensor, +// / but it is inverse order of SciPy's canonical coo_matrix +// / (SciPy employs column-major order for its coo_matrix). func (rcv *SparseTensorIndexCOO) MutateIsCanonical(n bool) bool { return rcv._tab.MutateBoolSlot(10, n) } diff --git a/go/arrow/internal/flatbuf/SparseTensorIndexCSF.go b/go/arrow/internal/flatbuf/SparseTensorIndexCSF.go index a824c84ebfe2e..66226e0412c21 100644 --- a/go/arrow/internal/flatbuf/SparseTensorIndexCSF.go +++ b/go/arrow/internal/flatbuf/SparseTensorIndexCSF.go @@ -22,7 +22,7 @@ import ( flatbuffers "github.com/google/flatbuffers/go" ) -/// Compressed Sparse Fiber (CSF) sparse tensor index. +// / Compressed Sparse Fiber (CSF) sparse tensor index. type SparseTensorIndexCSF struct { _tab flatbuffers.Table } @@ -43,37 +43,37 @@ func (rcv *SparseTensorIndexCSF) Table() flatbuffers.Table { return rcv._tab } -/// CSF is a generalization of compressed sparse row (CSR) index. -/// See [smith2017knl](http://shaden.io/pub-files/smith2017knl.pdf) -/// -/// CSF index recursively compresses each dimension of a tensor into a set -/// of prefix trees. Each path from a root to leaf forms one tensor -/// non-zero index. CSF is implemented with two arrays of buffers and one -/// arrays of integers. -/// -/// For example, let X be a 2x3x4x5 tensor and let it have the following -/// 8 non-zero values: -/// ```text -/// X[0, 0, 0, 1] := 1 -/// X[0, 0, 0, 2] := 2 -/// X[0, 1, 0, 0] := 3 -/// X[0, 1, 0, 2] := 4 -/// X[0, 1, 1, 0] := 5 -/// X[1, 1, 1, 0] := 6 -/// X[1, 1, 1, 1] := 7 -/// X[1, 1, 1, 2] := 8 -/// ``` -/// As a prefix tree this would be represented as: -/// ```text -/// 0 1 -/// / \ | -/// 0 1 1 -/// / / \ | -/// 0 0 1 1 -/// /| /| | /| | -/// 1 2 0 2 0 0 1 2 -/// ``` -/// The type of values in indptrBuffers +// / CSF is a generalization of compressed sparse row (CSR) index. +// / See [smith2017knl](http://shaden.io/pub-files/smith2017knl.pdf) +// / +// / CSF index recursively compresses each dimension of a tensor into a set +// / of prefix trees. Each path from a root to leaf forms one tensor +// / non-zero index. CSF is implemented with two arrays of buffers and one +// / arrays of integers. +// / +// / For example, let X be a 2x3x4x5 tensor and let it have the following +// / 8 non-zero values: +// / ```text +// / X[0, 0, 0, 1] := 1 +// / X[0, 0, 0, 2] := 2 +// / X[0, 1, 0, 0] := 3 +// / X[0, 1, 0, 2] := 4 +// / X[0, 1, 1, 0] := 5 +// / X[1, 1, 1, 0] := 6 +// / X[1, 1, 1, 1] := 7 +// / X[1, 1, 1, 2] := 8 +// / ``` +// / As a prefix tree this would be represented as: +// / ```text +// / 0 1 +// / / \ | +// / 0 1 1 +// / / / \ | +// / 0 0 1 1 +// / /| /| | /| | +// / 1 2 0 2 0 0 1 2 +// / ``` +// / The type of values in indptrBuffers func (rcv *SparseTensorIndexCSF) IndptrType(obj *Int) *Int { o := flatbuffers.UOffsetT(rcv._tab.Offset(4)) if o != 0 { @@ -87,51 +87,51 @@ func (rcv *SparseTensorIndexCSF) IndptrType(obj *Int) *Int { return nil } -/// CSF is a generalization of compressed sparse row (CSR) index. -/// See [smith2017knl](http://shaden.io/pub-files/smith2017knl.pdf) -/// -/// CSF index recursively compresses each dimension of a tensor into a set -/// of prefix trees. Each path from a root to leaf forms one tensor -/// non-zero index. CSF is implemented with two arrays of buffers and one -/// arrays of integers. -/// -/// For example, let X be a 2x3x4x5 tensor and let it have the following -/// 8 non-zero values: -/// ```text -/// X[0, 0, 0, 1] := 1 -/// X[0, 0, 0, 2] := 2 -/// X[0, 1, 0, 0] := 3 -/// X[0, 1, 0, 2] := 4 -/// X[0, 1, 1, 0] := 5 -/// X[1, 1, 1, 0] := 6 -/// X[1, 1, 1, 1] := 7 -/// X[1, 1, 1, 2] := 8 -/// ``` -/// As a prefix tree this would be represented as: -/// ```text -/// 0 1 -/// / \ | -/// 0 1 1 -/// / / \ | -/// 0 0 1 1 -/// /| /| | /| | -/// 1 2 0 2 0 0 1 2 -/// ``` -/// The type of values in indptrBuffers -/// indptrBuffers stores the sparsity structure. -/// Each two consecutive dimensions in a tensor correspond to a buffer in -/// indptrBuffers. A pair of consecutive values at `indptrBuffers[dim][i]` -/// and `indptrBuffers[dim][i + 1]` signify a range of nodes in -/// `indicesBuffers[dim + 1]` who are children of `indicesBuffers[dim][i]` node. -/// -/// For example, the indptrBuffers for the above X is: -/// ```text -/// indptrBuffer(X) = [ -/// [0, 2, 3], -/// [0, 1, 3, 4], -/// [0, 2, 4, 5, 8] -/// ]. -/// ``` +// / CSF is a generalization of compressed sparse row (CSR) index. +// / See [smith2017knl](http://shaden.io/pub-files/smith2017knl.pdf) +// / +// / CSF index recursively compresses each dimension of a tensor into a set +// / of prefix trees. Each path from a root to leaf forms one tensor +// / non-zero index. CSF is implemented with two arrays of buffers and one +// / arrays of integers. +// / +// / For example, let X be a 2x3x4x5 tensor and let it have the following +// / 8 non-zero values: +// / ```text +// / X[0, 0, 0, 1] := 1 +// / X[0, 0, 0, 2] := 2 +// / X[0, 1, 0, 0] := 3 +// / X[0, 1, 0, 2] := 4 +// / X[0, 1, 1, 0] := 5 +// / X[1, 1, 1, 0] := 6 +// / X[1, 1, 1, 1] := 7 +// / X[1, 1, 1, 2] := 8 +// / ``` +// / As a prefix tree this would be represented as: +// / ```text +// / 0 1 +// / / \ | +// / 0 1 1 +// / / / \ | +// / 0 0 1 1 +// / /| /| | /| | +// / 1 2 0 2 0 0 1 2 +// / ``` +// / The type of values in indptrBuffers +// / indptrBuffers stores the sparsity structure. +// / Each two consecutive dimensions in a tensor correspond to a buffer in +// / indptrBuffers. A pair of consecutive values at `indptrBuffers[dim][i]` +// / and `indptrBuffers[dim][i + 1]` signify a range of nodes in +// / `indicesBuffers[dim + 1]` who are children of `indicesBuffers[dim][i]` node. +// / +// / For example, the indptrBuffers for the above X is: +// / ```text +// / indptrBuffer(X) = [ +// / [0, 2, 3], +// / [0, 1, 3, 4], +// / [0, 2, 4, 5, 8] +// / ]. +// / ``` func (rcv *SparseTensorIndexCSF) IndptrBuffers(obj *Buffer, j int) bool { o := flatbuffers.UOffsetT(rcv._tab.Offset(6)) if o != 0 { @@ -151,21 +151,21 @@ func (rcv *SparseTensorIndexCSF) IndptrBuffersLength() int { return 0 } -/// indptrBuffers stores the sparsity structure. -/// Each two consecutive dimensions in a tensor correspond to a buffer in -/// indptrBuffers. A pair of consecutive values at `indptrBuffers[dim][i]` -/// and `indptrBuffers[dim][i + 1]` signify a range of nodes in -/// `indicesBuffers[dim + 1]` who are children of `indicesBuffers[dim][i]` node. -/// -/// For example, the indptrBuffers for the above X is: -/// ```text -/// indptrBuffer(X) = [ -/// [0, 2, 3], -/// [0, 1, 3, 4], -/// [0, 2, 4, 5, 8] -/// ]. -/// ``` -/// The type of values in indicesBuffers +// / indptrBuffers stores the sparsity structure. +// / Each two consecutive dimensions in a tensor correspond to a buffer in +// / indptrBuffers. A pair of consecutive values at `indptrBuffers[dim][i]` +// / and `indptrBuffers[dim][i + 1]` signify a range of nodes in +// / `indicesBuffers[dim + 1]` who are children of `indicesBuffers[dim][i]` node. +// / +// / For example, the indptrBuffers for the above X is: +// / ```text +// / indptrBuffer(X) = [ +// / [0, 2, 3], +// / [0, 1, 3, 4], +// / [0, 2, 4, 5, 8] +// / ]. +// / ``` +// / The type of values in indicesBuffers func (rcv *SparseTensorIndexCSF) IndicesType(obj *Int) *Int { o := flatbuffers.UOffsetT(rcv._tab.Offset(8)) if o != 0 { @@ -179,18 +179,18 @@ func (rcv *SparseTensorIndexCSF) IndicesType(obj *Int) *Int { return nil } -/// The type of values in indicesBuffers -/// indicesBuffers stores values of nodes. -/// Each tensor dimension corresponds to a buffer in indicesBuffers. -/// For example, the indicesBuffers for the above X is: -/// ```text -/// indicesBuffer(X) = [ -/// [0, 1], -/// [0, 1, 1], -/// [0, 0, 1, 1], -/// [1, 2, 0, 2, 0, 0, 1, 2] -/// ]. -/// ``` +// / The type of values in indicesBuffers +// / indicesBuffers stores values of nodes. +// / Each tensor dimension corresponds to a buffer in indicesBuffers. +// / For example, the indicesBuffers for the above X is: +// / ```text +// / indicesBuffer(X) = [ +// / [0, 1], +// / [0, 1, 1], +// / [0, 0, 1, 1], +// / [1, 2, 0, 2, 0, 0, 1, 2] +// / ]. +// / ``` func (rcv *SparseTensorIndexCSF) IndicesBuffers(obj *Buffer, j int) bool { o := flatbuffers.UOffsetT(rcv._tab.Offset(10)) if o != 0 { @@ -210,23 +210,23 @@ func (rcv *SparseTensorIndexCSF) IndicesBuffersLength() int { return 0 } -/// indicesBuffers stores values of nodes. -/// Each tensor dimension corresponds to a buffer in indicesBuffers. -/// For example, the indicesBuffers for the above X is: -/// ```text -/// indicesBuffer(X) = [ -/// [0, 1], -/// [0, 1, 1], -/// [0, 0, 1, 1], -/// [1, 2, 0, 2, 0, 0, 1, 2] -/// ]. -/// ``` -/// axisOrder stores the sequence in which dimensions were traversed to -/// produce the prefix tree. -/// For example, the axisOrder for the above X is: -/// ```text -/// axisOrder(X) = [0, 1, 2, 3]. -/// ``` +// / indicesBuffers stores values of nodes. +// / Each tensor dimension corresponds to a buffer in indicesBuffers. +// / For example, the indicesBuffers for the above X is: +// / ```text +// / indicesBuffer(X) = [ +// / [0, 1], +// / [0, 1, 1], +// / [0, 0, 1, 1], +// / [1, 2, 0, 2, 0, 0, 1, 2] +// / ]. +// / ``` +// / axisOrder stores the sequence in which dimensions were traversed to +// / produce the prefix tree. +// / For example, the axisOrder for the above X is: +// / ```text +// / axisOrder(X) = [0, 1, 2, 3]. +// / ``` func (rcv *SparseTensorIndexCSF) AxisOrder(j int) int32 { o := flatbuffers.UOffsetT(rcv._tab.Offset(12)) if o != 0 { @@ -244,12 +244,12 @@ func (rcv *SparseTensorIndexCSF) AxisOrderLength() int { return 0 } -/// axisOrder stores the sequence in which dimensions were traversed to -/// produce the prefix tree. -/// For example, the axisOrder for the above X is: -/// ```text -/// axisOrder(X) = [0, 1, 2, 3]. -/// ``` +// / axisOrder stores the sequence in which dimensions were traversed to +// / produce the prefix tree. +// / For example, the axisOrder for the above X is: +// / ```text +// / axisOrder(X) = [0, 1, 2, 3]. +// / ``` func (rcv *SparseTensorIndexCSF) MutateAxisOrder(j int, n int32) bool { o := flatbuffers.UOffsetT(rcv._tab.Offset(12)) if o != 0 { diff --git a/go/arrow/internal/flatbuf/Struct_.go b/go/arrow/internal/flatbuf/Struct_.go index 427e7060382af..73752a17e00fa 100644 --- a/go/arrow/internal/flatbuf/Struct_.go +++ b/go/arrow/internal/flatbuf/Struct_.go @@ -22,9 +22,9 @@ import ( flatbuffers "github.com/google/flatbuffers/go" ) -/// A Struct_ in the flatbuffer metadata is the same as an Arrow Struct -/// (according to the physical memory layout). We used Struct_ here as -/// Struct is a reserved word in Flatbuffers +// / A Struct_ in the flatbuffer metadata is the same as an Arrow Struct +// / (according to the physical memory layout). We used Struct_ here as +// / Struct is a reserved word in Flatbuffers type Struct_ struct { _tab flatbuffers.Table } diff --git a/go/arrow/internal/flatbuf/Tensor.go b/go/arrow/internal/flatbuf/Tensor.go index 39d70e351e3d6..47bfe8067b57b 100644 --- a/go/arrow/internal/flatbuf/Tensor.go +++ b/go/arrow/internal/flatbuf/Tensor.go @@ -54,8 +54,8 @@ func (rcv *Tensor) MutateTypeType(n Type) bool { return rcv._tab.MutateByteSlot(4, byte(n)) } -/// The type of data contained in a value cell. Currently only fixed-width -/// value types are supported, no strings or nested types +// / The type of data contained in a value cell. Currently only fixed-width +// / value types are supported, no strings or nested types func (rcv *Tensor) Type(obj *flatbuffers.Table) bool { o := flatbuffers.UOffsetT(rcv._tab.Offset(6)) if o != 0 { @@ -65,9 +65,9 @@ func (rcv *Tensor) Type(obj *flatbuffers.Table) bool { return false } -/// The type of data contained in a value cell. Currently only fixed-width -/// value types are supported, no strings or nested types -/// The dimensions of the tensor, optionally named +// / The type of data contained in a value cell. Currently only fixed-width +// / value types are supported, no strings or nested types +// / The dimensions of the tensor, optionally named func (rcv *Tensor) Shape(obj *TensorDim, j int) bool { o := flatbuffers.UOffsetT(rcv._tab.Offset(8)) if o != 0 { @@ -88,9 +88,9 @@ func (rcv *Tensor) ShapeLength() int { return 0 } -/// The dimensions of the tensor, optionally named -/// Non-negative byte offsets to advance one value cell along each dimension -/// If omitted, default to row-major order (C-like). +// / The dimensions of the tensor, optionally named +// / Non-negative byte offsets to advance one value cell along each dimension +// / If omitted, default to row-major order (C-like). func (rcv *Tensor) Strides(j int) int64 { o := flatbuffers.UOffsetT(rcv._tab.Offset(10)) if o != 0 { @@ -108,8 +108,8 @@ func (rcv *Tensor) StridesLength() int { return 0 } -/// Non-negative byte offsets to advance one value cell along each dimension -/// If omitted, default to row-major order (C-like). +// / Non-negative byte offsets to advance one value cell along each dimension +// / If omitted, default to row-major order (C-like). func (rcv *Tensor) MutateStrides(j int, n int64) bool { o := flatbuffers.UOffsetT(rcv._tab.Offset(10)) if o != 0 { @@ -119,7 +119,7 @@ func (rcv *Tensor) MutateStrides(j int, n int64) bool { return false } -/// The location and size of the tensor's data +// / The location and size of the tensor's data func (rcv *Tensor) Data(obj *Buffer) *Buffer { o := flatbuffers.UOffsetT(rcv._tab.Offset(12)) if o != 0 { @@ -133,7 +133,7 @@ func (rcv *Tensor) Data(obj *Buffer) *Buffer { return nil } -/// The location and size of the tensor's data +// / The location and size of the tensor's data func TensorStart(builder *flatbuffers.Builder) { builder.StartObject(5) } diff --git a/go/arrow/internal/flatbuf/TensorDim.go b/go/arrow/internal/flatbuf/TensorDim.go index 14b82120887e9..c6413b6a8c0bd 100644 --- a/go/arrow/internal/flatbuf/TensorDim.go +++ b/go/arrow/internal/flatbuf/TensorDim.go @@ -22,9 +22,9 @@ import ( flatbuffers "github.com/google/flatbuffers/go" ) -/// ---------------------------------------------------------------------- -/// Data structures for dense tensors -/// Shape data for a single axis in a tensor +// / ---------------------------------------------------------------------- +// / Data structures for dense tensors +// / Shape data for a single axis in a tensor type TensorDim struct { _tab flatbuffers.Table } @@ -45,7 +45,7 @@ func (rcv *TensorDim) Table() flatbuffers.Table { return rcv._tab } -/// Length of dimension +// / Length of dimension func (rcv *TensorDim) Size() int64 { o := flatbuffers.UOffsetT(rcv._tab.Offset(4)) if o != 0 { @@ -54,12 +54,12 @@ func (rcv *TensorDim) Size() int64 { return 0 } -/// Length of dimension +// / Length of dimension func (rcv *TensorDim) MutateSize(n int64) bool { return rcv._tab.MutateInt64Slot(4, n) } -/// Name of the dimension, optional +// / Name of the dimension, optional func (rcv *TensorDim) Name() []byte { o := flatbuffers.UOffsetT(rcv._tab.Offset(6)) if o != 0 { @@ -68,7 +68,7 @@ func (rcv *TensorDim) Name() []byte { return nil } -/// Name of the dimension, optional +// / Name of the dimension, optional func TensorDimStart(builder *flatbuffers.Builder) { builder.StartObject(2) } diff --git a/go/arrow/internal/flatbuf/Time.go b/go/arrow/internal/flatbuf/Time.go index 2fb6e4c110e0a..13038a6e33280 100644 --- a/go/arrow/internal/flatbuf/Time.go +++ b/go/arrow/internal/flatbuf/Time.go @@ -22,20 +22,20 @@ import ( flatbuffers "github.com/google/flatbuffers/go" ) -/// Time is either a 32-bit or 64-bit signed integer type representing an -/// elapsed time since midnight, stored in either of four units: seconds, -/// milliseconds, microseconds or nanoseconds. -/// -/// The integer `bitWidth` depends on the `unit` and must be one of the following: -/// * SECOND and MILLISECOND: 32 bits -/// * MICROSECOND and NANOSECOND: 64 bits -/// -/// The allowed values are between 0 (inclusive) and 86400 (=24*60*60) seconds -/// (exclusive), adjusted for the time unit (for example, up to 86400000 -/// exclusive for the MILLISECOND unit). -/// This definition doesn't allow for leap seconds. Time values from -/// measurements with leap seconds will need to be corrected when ingesting -/// into Arrow (for example by replacing the value 86400 with 86399). +// / Time is either a 32-bit or 64-bit signed integer type representing an +// / elapsed time since midnight, stored in either of four units: seconds, +// / milliseconds, microseconds or nanoseconds. +// / +// / The integer `bitWidth` depends on the `unit` and must be one of the following: +// / * SECOND and MILLISECOND: 32 bits +// / * MICROSECOND and NANOSECOND: 64 bits +// / +// / The allowed values are between 0 (inclusive) and 86400 (=24*60*60) seconds +// / (exclusive), adjusted for the time unit (for example, up to 86400000 +// / exclusive for the MILLISECOND unit). +// / This definition doesn't allow for leap seconds. Time values from +// / measurements with leap seconds will need to be corrected when ingesting +// / into Arrow (for example by replacing the value 86400 with 86399). type Time struct { _tab flatbuffers.Table } diff --git a/go/arrow/internal/flatbuf/Timestamp.go b/go/arrow/internal/flatbuf/Timestamp.go index d0058e13e6545..ce172bacdd3c3 100644 --- a/go/arrow/internal/flatbuf/Timestamp.go +++ b/go/arrow/internal/flatbuf/Timestamp.go @@ -22,111 +22,111 @@ import ( flatbuffers "github.com/google/flatbuffers/go" ) -/// Timestamp is a 64-bit signed integer representing an elapsed time since a -/// fixed epoch, stored in either of four units: seconds, milliseconds, -/// microseconds or nanoseconds, and is optionally annotated with a timezone. -/// -/// Timestamp values do not include any leap seconds (in other words, all -/// days are considered 86400 seconds long). -/// -/// Timestamps with a non-empty timezone -/// ------------------------------------ -/// -/// If a Timestamp column has a non-empty timezone value, its epoch is -/// 1970-01-01 00:00:00 (January 1st 1970, midnight) in the *UTC* timezone -/// (the Unix epoch), regardless of the Timestamp's own timezone. -/// -/// Therefore, timestamp values with a non-empty timezone correspond to -/// physical points in time together with some additional information about -/// how the data was obtained and/or how to display it (the timezone). -/// -/// For example, the timestamp value 0 with the timezone string "Europe/Paris" -/// corresponds to "January 1st 1970, 00h00" in the UTC timezone, but the -/// application may prefer to display it as "January 1st 1970, 01h00" in -/// the Europe/Paris timezone (which is the same physical point in time). -/// -/// One consequence is that timestamp values with a non-empty timezone -/// can be compared and ordered directly, since they all share the same -/// well-known point of reference (the Unix epoch). -/// -/// Timestamps with an unset / empty timezone -/// ----------------------------------------- -/// -/// If a Timestamp column has no timezone value, its epoch is -/// 1970-01-01 00:00:00 (January 1st 1970, midnight) in an *unknown* timezone. -/// -/// Therefore, timestamp values without a timezone cannot be meaningfully -/// interpreted as physical points in time, but only as calendar / clock -/// indications ("wall clock time") in an unspecified timezone. -/// -/// For example, the timestamp value 0 with an empty timezone string -/// corresponds to "January 1st 1970, 00h00" in an unknown timezone: there -/// is not enough information to interpret it as a well-defined physical -/// point in time. -/// -/// One consequence is that timestamp values without a timezone cannot -/// be reliably compared or ordered, since they may have different points of -/// reference. In particular, it is *not* possible to interpret an unset -/// or empty timezone as the same as "UTC". -/// -/// Conversion between timezones -/// ---------------------------- -/// -/// If a Timestamp column has a non-empty timezone, changing the timezone -/// to a different non-empty value is a metadata-only operation: -/// the timestamp values need not change as their point of reference remains -/// the same (the Unix epoch). -/// -/// However, if a Timestamp column has no timezone value, changing it to a -/// non-empty value requires to think about the desired semantics. -/// One possibility is to assume that the original timestamp values are -/// relative to the epoch of the timezone being set; timestamp values should -/// then adjusted to the Unix epoch (for example, changing the timezone from -/// empty to "Europe/Paris" would require converting the timestamp values -/// from "Europe/Paris" to "UTC", which seems counter-intuitive but is -/// nevertheless correct). -/// -/// Guidelines for encoding data from external libraries -/// ---------------------------------------------------- -/// -/// Date & time libraries often have multiple different data types for temporal -/// data. In order to ease interoperability between different implementations the -/// Arrow project has some recommendations for encoding these types into a Timestamp -/// column. -/// -/// An "instant" represents a physical point in time that has no relevant timezone -/// (for example, astronomical data). To encode an instant, use a Timestamp with -/// the timezone string set to "UTC", and make sure the Timestamp values -/// are relative to the UTC epoch (January 1st 1970, midnight). -/// -/// A "zoned date-time" represents a physical point in time annotated with an -/// informative timezone (for example, the timezone in which the data was -/// recorded). To encode a zoned date-time, use a Timestamp with the timezone -/// string set to the name of the timezone, and make sure the Timestamp values -/// are relative to the UTC epoch (January 1st 1970, midnight). -/// -/// (There is some ambiguity between an instant and a zoned date-time with the -/// UTC timezone. Both of these are stored the same in Arrow. Typically, -/// this distinction does not matter. If it does, then an application should -/// use custom metadata or an extension type to distinguish between the two cases.) -/// -/// An "offset date-time" represents a physical point in time combined with an -/// explicit offset from UTC. To encode an offset date-time, use a Timestamp -/// with the timezone string set to the numeric timezone offset string -/// (e.g. "+03:00"), and make sure the Timestamp values are relative to -/// the UTC epoch (January 1st 1970, midnight). -/// -/// A "naive date-time" (also called "local date-time" in some libraries) -/// represents a wall clock time combined with a calendar date, but with -/// no indication of how to map this information to a physical point in time. -/// Naive date-times must be handled with care because of this missing -/// information, and also because daylight saving time (DST) may make -/// some values ambiguous or nonexistent. A naive date-time may be -/// stored as a struct with Date and Time fields. However, it may also be -/// encoded into a Timestamp column with an empty timezone. The timestamp -/// values should be computed "as if" the timezone of the date-time values -/// was UTC; for example, the naive date-time "January 1st 1970, 00h00" would -/// be encoded as timestamp value 0. +// / Timestamp is a 64-bit signed integer representing an elapsed time since a +// / fixed epoch, stored in either of four units: seconds, milliseconds, +// / microseconds or nanoseconds, and is optionally annotated with a timezone. +// / +// / Timestamp values do not include any leap seconds (in other words, all +// / days are considered 86400 seconds long). +// / +// / Timestamps with a non-empty timezone +// / ------------------------------------ +// / +// / If a Timestamp column has a non-empty timezone value, its epoch is +// / 1970-01-01 00:00:00 (January 1st 1970, midnight) in the *UTC* timezone +// / (the Unix epoch), regardless of the Timestamp's own timezone. +// / +// / Therefore, timestamp values with a non-empty timezone correspond to +// / physical points in time together with some additional information about +// / how the data was obtained and/or how to display it (the timezone). +// / +// / For example, the timestamp value 0 with the timezone string "Europe/Paris" +// / corresponds to "January 1st 1970, 00h00" in the UTC timezone, but the +// / application may prefer to display it as "January 1st 1970, 01h00" in +// / the Europe/Paris timezone (which is the same physical point in time). +// / +// / One consequence is that timestamp values with a non-empty timezone +// / can be compared and ordered directly, since they all share the same +// / well-known point of reference (the Unix epoch). +// / +// / Timestamps with an unset / empty timezone +// / ----------------------------------------- +// / +// / If a Timestamp column has no timezone value, its epoch is +// / 1970-01-01 00:00:00 (January 1st 1970, midnight) in an *unknown* timezone. +// / +// / Therefore, timestamp values without a timezone cannot be meaningfully +// / interpreted as physical points in time, but only as calendar / clock +// / indications ("wall clock time") in an unspecified timezone. +// / +// / For example, the timestamp value 0 with an empty timezone string +// / corresponds to "January 1st 1970, 00h00" in an unknown timezone: there +// / is not enough information to interpret it as a well-defined physical +// / point in time. +// / +// / One consequence is that timestamp values without a timezone cannot +// / be reliably compared or ordered, since they may have different points of +// / reference. In particular, it is *not* possible to interpret an unset +// / or empty timezone as the same as "UTC". +// / +// / Conversion between timezones +// / ---------------------------- +// / +// / If a Timestamp column has a non-empty timezone, changing the timezone +// / to a different non-empty value is a metadata-only operation: +// / the timestamp values need not change as their point of reference remains +// / the same (the Unix epoch). +// / +// / However, if a Timestamp column has no timezone value, changing it to a +// / non-empty value requires to think about the desired semantics. +// / One possibility is to assume that the original timestamp values are +// / relative to the epoch of the timezone being set; timestamp values should +// / then adjusted to the Unix epoch (for example, changing the timezone from +// / empty to "Europe/Paris" would require converting the timestamp values +// / from "Europe/Paris" to "UTC", which seems counter-intuitive but is +// / nevertheless correct). +// / +// / Guidelines for encoding data from external libraries +// / ---------------------------------------------------- +// / +// / Date & time libraries often have multiple different data types for temporal +// / data. In order to ease interoperability between different implementations the +// / Arrow project has some recommendations for encoding these types into a Timestamp +// / column. +// / +// / An "instant" represents a physical point in time that has no relevant timezone +// / (for example, astronomical data). To encode an instant, use a Timestamp with +// / the timezone string set to "UTC", and make sure the Timestamp values +// / are relative to the UTC epoch (January 1st 1970, midnight). +// / +// / A "zoned date-time" represents a physical point in time annotated with an +// / informative timezone (for example, the timezone in which the data was +// / recorded). To encode a zoned date-time, use a Timestamp with the timezone +// / string set to the name of the timezone, and make sure the Timestamp values +// / are relative to the UTC epoch (January 1st 1970, midnight). +// / +// / (There is some ambiguity between an instant and a zoned date-time with the +// / UTC timezone. Both of these are stored the same in Arrow. Typically, +// / this distinction does not matter. If it does, then an application should +// / use custom metadata or an extension type to distinguish between the two cases.) +// / +// / An "offset date-time" represents a physical point in time combined with an +// / explicit offset from UTC. To encode an offset date-time, use a Timestamp +// / with the timezone string set to the numeric timezone offset string +// / (e.g. "+03:00"), and make sure the Timestamp values are relative to +// / the UTC epoch (January 1st 1970, midnight). +// / +// / A "naive date-time" (also called "local date-time" in some libraries) +// / represents a wall clock time combined with a calendar date, but with +// / no indication of how to map this information to a physical point in time. +// / Naive date-times must be handled with care because of this missing +// / information, and also because daylight saving time (DST) may make +// / some values ambiguous or nonexistent. A naive date-time may be +// / stored as a struct with Date and Time fields. However, it may also be +// / encoded into a Timestamp column with an empty timezone. The timestamp +// / values should be computed "as if" the timezone of the date-time values +// / was UTC; for example, the naive date-time "January 1st 1970, 00h00" would +// / be encoded as timestamp value 0. type Timestamp struct { _tab flatbuffers.Table } @@ -159,16 +159,16 @@ func (rcv *Timestamp) MutateUnit(n TimeUnit) bool { return rcv._tab.MutateInt16Slot(4, int16(n)) } -/// The timezone is an optional string indicating the name of a timezone, -/// one of: -/// -/// * As used in the Olson timezone database (the "tz database" or -/// "tzdata"), such as "America/New_York". -/// * An absolute timezone offset of the form "+XX:XX" or "-XX:XX", -/// such as "+07:30". -/// -/// Whether a timezone string is present indicates different semantics about -/// the data (see above). +// / The timezone is an optional string indicating the name of a timezone, +// / one of: +// / +// / * As used in the Olson timezone database (the "tz database" or +// / "tzdata"), such as "America/New_York". +// / * An absolute timezone offset of the form "+XX:XX" or "-XX:XX", +// / such as "+07:30". +// / +// / Whether a timezone string is present indicates different semantics about +// / the data (see above). func (rcv *Timestamp) Timezone() []byte { o := flatbuffers.UOffsetT(rcv._tab.Offset(6)) if o != 0 { @@ -177,16 +177,16 @@ func (rcv *Timestamp) Timezone() []byte { return nil } -/// The timezone is an optional string indicating the name of a timezone, -/// one of: -/// -/// * As used in the Olson timezone database (the "tz database" or -/// "tzdata"), such as "America/New_York". -/// * An absolute timezone offset of the form "+XX:XX" or "-XX:XX", -/// such as "+07:30". -/// -/// Whether a timezone string is present indicates different semantics about -/// the data (see above). +// / The timezone is an optional string indicating the name of a timezone, +// / one of: +// / +// / * As used in the Olson timezone database (the "tz database" or +// / "tzdata"), such as "America/New_York". +// / * An absolute timezone offset of the form "+XX:XX" or "-XX:XX", +// / such as "+07:30". +// / +// / Whether a timezone string is present indicates different semantics about +// / the data (see above). func TimestampStart(builder *flatbuffers.Builder) { builder.StartObject(2) } diff --git a/go/arrow/internal/flatbuf/Type.go b/go/arrow/internal/flatbuf/Type.go index ab2bce9c63657..df8ba8650e1cd 100644 --- a/go/arrow/internal/flatbuf/Type.go +++ b/go/arrow/internal/flatbuf/Type.go @@ -20,9 +20,9 @@ package flatbuf import "strconv" -/// ---------------------------------------------------------------------- -/// Top-level Type value, enabling extensible type-specific metadata. We can -/// add new logical types to Type without breaking backwards compatibility +// / ---------------------------------------------------------------------- +// / Top-level Type value, enabling extensible type-specific metadata. We can +// / add new logical types to Type without breaking backwards compatibility type Type byte const ( diff --git a/go/arrow/internal/flatbuf/Union.go b/go/arrow/internal/flatbuf/Union.go index e34121d4757f2..0367fb3c1fb94 100644 --- a/go/arrow/internal/flatbuf/Union.go +++ b/go/arrow/internal/flatbuf/Union.go @@ -22,10 +22,10 @@ import ( flatbuffers "github.com/google/flatbuffers/go" ) -/// A union is a complex type with children in Field -/// By default ids in the type vector refer to the offsets in the children -/// optionally typeIds provides an indirection between the child offset and the type id -/// for each child `typeIds[offset]` is the id used in the type vector +// / A union is a complex type with children in Field +// / By default ids in the type vector refer to the offsets in the children +// / optionally typeIds provides an indirection between the child offset and the type id +// / for each child `typeIds[offset]` is the id used in the type vector type Union struct { _tab flatbuffers.Table } diff --git a/go/arrow/internal/flatbuf/Utf8.go b/go/arrow/internal/flatbuf/Utf8.go index 4ff365a37504a..cab4ce7743ca9 100644 --- a/go/arrow/internal/flatbuf/Utf8.go +++ b/go/arrow/internal/flatbuf/Utf8.go @@ -22,7 +22,7 @@ import ( flatbuffers "github.com/google/flatbuffers/go" ) -/// Unicode with UTF-8 encoding +// / Unicode with UTF-8 encoding type Utf8 struct { _tab flatbuffers.Table } diff --git a/go/arrow/internal/flatbuf/Utf8View.go b/go/arrow/internal/flatbuf/Utf8View.go index 9cf821490198f..f294126a618b6 100644 --- a/go/arrow/internal/flatbuf/Utf8View.go +++ b/go/arrow/internal/flatbuf/Utf8View.go @@ -22,13 +22,13 @@ import ( flatbuffers "github.com/google/flatbuffers/go" ) -/// Logically the same as Utf8, but the internal representation uses a view -/// struct that contains the string length and either the string's entire data -/// inline (for small strings) or an inlined prefix, an index of another buffer, -/// and an offset pointing to a slice in that buffer (for non-small strings). -/// -/// Since it uses a variable number of data buffers, each Field with this type -/// must have a corresponding entry in `variadicBufferCounts`. +// / Logically the same as Utf8, but the internal representation uses a view +// / struct that contains the string length and either the string's entire data +// / inline (for small strings) or an inlined prefix, an index of another buffer, +// / and an offset pointing to a slice in that buffer (for non-small strings). +// / +// / Since it uses a variable number of data buffers, each Field with this type +// / must have a corresponding entry in `variadicBufferCounts`. type Utf8View struct { _tab flatbuffers.Table } diff --git a/go/arrow/internal/flight_integration/scenario.go b/go/arrow/internal/flight_integration/scenario.go index cde0fff522ec5..ccfc7a0ed45a3 100644 --- a/go/arrow/internal/flight_integration/scenario.go +++ b/go/arrow/internal/flight_integration/scenario.go @@ -2134,7 +2134,7 @@ func (m *flightSqlScenarioTester) ClosePreparedStatement(_ context.Context, requ return nil } -func (m *flightSqlScenarioTester) DoPutPreparedStatementQuery(_ context.Context, cmd flightsql.PreparedStatementQuery, rdr flight.MessageReader, _ flight.MetadataWriter) ([]byte, error){ +func (m *flightSqlScenarioTester) DoPutPreparedStatementQuery(_ context.Context, cmd flightsql.PreparedStatementQuery, rdr flight.MessageReader, _ flight.MetadataWriter) ([]byte, error) { switch string(cmd.GetPreparedStatementHandle()) { case "SELECT PREPARED STATEMENT HANDLE", "SELECT PREPARED STATEMENT WITH TXN HANDLE", diff --git a/go/arrow/ipc/cmd/arrow-cat/main.go b/go/arrow/ipc/cmd/arrow-cat/main.go index 080401e56a83e..4faaabb05ddc1 100644 --- a/go/arrow/ipc/cmd/arrow-cat/main.go +++ b/go/arrow/ipc/cmd/arrow-cat/main.go @@ -18,40 +18,40 @@ // // Examples: // -// $> arrow-cat ./testdata/primitives.data -// version: V4 -// record 1/3... -// col[0] "bools": [true (null) (null) false true] -// col[1] "int8s": [-1 (null) (null) -4 -5] -// col[2] "int16s": [-1 (null) (null) -4 -5] -// col[3] "int32s": [-1 (null) (null) -4 -5] -// col[4] "int64s": [-1 (null) (null) -4 -5] -// col[5] "uint8s": [1 (null) (null) 4 5] -// col[6] "uint16s": [1 (null) (null) 4 5] -// col[7] "uint32s": [1 (null) (null) 4 5] -// col[8] "uint64s": [1 (null) (null) 4 5] -// col[9] "float32s": [1 (null) (null) 4 5] -// col[10] "float64s": [1 (null) (null) 4 5] -// record 2/3... -// col[0] "bools": [true (null) (null) false true] -// [...] +// $> arrow-cat ./testdata/primitives.data +// version: V4 +// record 1/3... +// col[0] "bools": [true (null) (null) false true] +// col[1] "int8s": [-1 (null) (null) -4 -5] +// col[2] "int16s": [-1 (null) (null) -4 -5] +// col[3] "int32s": [-1 (null) (null) -4 -5] +// col[4] "int64s": [-1 (null) (null) -4 -5] +// col[5] "uint8s": [1 (null) (null) 4 5] +// col[6] "uint16s": [1 (null) (null) 4 5] +// col[7] "uint32s": [1 (null) (null) 4 5] +// col[8] "uint64s": [1 (null) (null) 4 5] +// col[9] "float32s": [1 (null) (null) 4 5] +// col[10] "float64s": [1 (null) (null) 4 5] +// record 2/3... +// col[0] "bools": [true (null) (null) false true] +// [...] // -// $> gen-arrow-stream | arrow-cat -// record 1... -// col[0] "bools": [true (null) (null) false true] -// col[1] "int8s": [-1 (null) (null) -4 -5] -// col[2] "int16s": [-1 (null) (null) -4 -5] -// col[3] "int32s": [-1 (null) (null) -4 -5] -// col[4] "int64s": [-1 (null) (null) -4 -5] -// col[5] "uint8s": [1 (null) (null) 4 5] -// col[6] "uint16s": [1 (null) (null) 4 5] -// col[7] "uint32s": [1 (null) (null) 4 5] -// col[8] "uint64s": [1 (null) (null) 4 5] -// col[9] "float32s": [1 (null) (null) 4 5] -// col[10] "float64s": [1 (null) (null) 4 5] -// record 2... -// col[0] "bools": [true (null) (null) false true] -// [...] +// $> gen-arrow-stream | arrow-cat +// record 1... +// col[0] "bools": [true (null) (null) false true] +// col[1] "int8s": [-1 (null) (null) -4 -5] +// col[2] "int16s": [-1 (null) (null) -4 -5] +// col[3] "int32s": [-1 (null) (null) -4 -5] +// col[4] "int64s": [-1 (null) (null) -4 -5] +// col[5] "uint8s": [1 (null) (null) 4 5] +// col[6] "uint16s": [1 (null) (null) 4 5] +// col[7] "uint32s": [1 (null) (null) 4 5] +// col[8] "uint64s": [1 (null) (null) 4 5] +// col[9] "float32s": [1 (null) (null) 4 5] +// col[10] "float64s": [1 (null) (null) 4 5] +// record 2... +// col[0] "bools": [true (null) (null) false true] +// [...] package main import ( diff --git a/go/arrow/ipc/cmd/arrow-ls/main.go b/go/arrow/ipc/cmd/arrow-ls/main.go index 2be1d076e45f0..2f54744c4068d 100644 --- a/go/arrow/ipc/cmd/arrow-ls/main.go +++ b/go/arrow/ipc/cmd/arrow-ls/main.go @@ -18,38 +18,38 @@ // // Examples: // -// $> arrow-ls ./testdata/primitives.data -// version: V4 -// schema: -// fields: 11 -// - bools: type=bool, nullable -// - int8s: type=int8, nullable -// - int16s: type=int16, nullable -// - int32s: type=int32, nullable -// - int64s: type=int64, nullable -// - uint8s: type=uint8, nullable -// - uint16s: type=uint16, nullable -// - uint32s: type=uint32, nullable -// - uint64s: type=uint64, nullable -// - float32s: type=float32, nullable -// - float64s: type=float64, nullable -// records: 3 +// $> arrow-ls ./testdata/primitives.data +// version: V4 +// schema: +// fields: 11 +// - bools: type=bool, nullable +// - int8s: type=int8, nullable +// - int16s: type=int16, nullable +// - int32s: type=int32, nullable +// - int64s: type=int64, nullable +// - uint8s: type=uint8, nullable +// - uint16s: type=uint16, nullable +// - uint32s: type=uint32, nullable +// - uint64s: type=uint64, nullable +// - float32s: type=float32, nullable +// - float64s: type=float64, nullable +// records: 3 // -// $> gen-arrow-stream | arrow-ls -// schema: -// fields: 11 -// - bools: type=bool, nullable -// - int8s: type=int8, nullable -// - int16s: type=int16, nullable -// - int32s: type=int32, nullable -// - int64s: type=int64, nullable -// - uint8s: type=uint8, nullable -// - uint16s: type=uint16, nullable -// - uint32s: type=uint32, nullable -// - uint64s: type=uint64, nullable -// - float32s: type=float32, nullable -// - float64s: type=float64, nullable -// records: 3 +// $> gen-arrow-stream | arrow-ls +// schema: +// fields: 11 +// - bools: type=bool, nullable +// - int8s: type=int8, nullable +// - int16s: type=int16, nullable +// - int32s: type=int32, nullable +// - int64s: type=int64, nullable +// - uint8s: type=uint8, nullable +// - uint16s: type=uint16, nullable +// - uint32s: type=uint32, nullable +// - uint64s: type=uint64, nullable +// - float32s: type=float32, nullable +// - float64s: type=float64, nullable +// records: 3 package main import ( diff --git a/go/arrow/math/math_amd64.go b/go/arrow/math/math_amd64.go index 44301dc2415a5..2397eef718df9 100644 --- a/go/arrow/math/math_amd64.go +++ b/go/arrow/math/math_amd64.go @@ -14,6 +14,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build !noasm // +build !noasm package math diff --git a/go/arrow/math/math_arm64.go b/go/arrow/math/math_arm64.go index 014664b046308..b150eb061f9f5 100644 --- a/go/arrow/math/math_arm64.go +++ b/go/arrow/math/math_arm64.go @@ -14,6 +14,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build !noasm // +build !noasm package math @@ -25,8 +26,8 @@ import ( func init() { if cpu.ARM64.HasASIMD { initNEON() - } else { - initGo() + } else { + initGo() } } diff --git a/go/arrow/math/math_noasm.go b/go/arrow/math/math_noasm.go index 0fa924d90aa88..5527ebf801891 100644 --- a/go/arrow/math/math_noasm.go +++ b/go/arrow/math/math_noasm.go @@ -14,6 +14,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build noasm // +build noasm package math diff --git a/go/arrow/math/math_ppc64le.go b/go/arrow/math/math_ppc64le.go index 3daeac7efaff8..85c8f2fe2e758 100644 --- a/go/arrow/math/math_ppc64le.go +++ b/go/arrow/math/math_ppc64le.go @@ -14,6 +14,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build !noasm // +build !noasm package math diff --git a/go/arrow/math/math_s390x.go b/go/arrow/math/math_s390x.go index 3daeac7efaff8..85c8f2fe2e758 100644 --- a/go/arrow/math/math_s390x.go +++ b/go/arrow/math/math_s390x.go @@ -14,6 +14,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build !noasm // +build !noasm package math diff --git a/go/arrow/memory/cgo_allocator.go b/go/arrow/memory/cgo_allocator.go index ffc6b2cb88050..5eb66ade9d861 100644 --- a/go/arrow/memory/cgo_allocator.go +++ b/go/arrow/memory/cgo_allocator.go @@ -14,8 +14,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -// +build cgo -// +build ccalloc +//go:build cgo && ccalloc +// +build cgo,ccalloc package memory diff --git a/go/arrow/memory/cgo_allocator_defaults.go b/go/arrow/memory/cgo_allocator_defaults.go index 501431a0e1eb2..0a2e9a342d37c 100644 --- a/go/arrow/memory/cgo_allocator_defaults.go +++ b/go/arrow/memory/cgo_allocator_defaults.go @@ -14,9 +14,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -// +build cgo -// +build ccalloc -// +build !cclog +//go:build cgo && ccalloc && !cclog +// +build cgo,ccalloc,!cclog package memory diff --git a/go/arrow/memory/cgo_allocator_logging.go b/go/arrow/memory/cgo_allocator_logging.go index 01ad6b394807d..fe2e3a940ce21 100644 --- a/go/arrow/memory/cgo_allocator_logging.go +++ b/go/arrow/memory/cgo_allocator_logging.go @@ -14,9 +14,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -// +build cgo -// +build ccalloc -// +build cclog +//go:build cgo && ccalloc && cclog +// +build cgo,ccalloc,cclog package memory diff --git a/go/arrow/memory/cgo_allocator_test.go b/go/arrow/memory/cgo_allocator_test.go index e7a03767fc89a..4c07cc326c87f 100644 --- a/go/arrow/memory/cgo_allocator_test.go +++ b/go/arrow/memory/cgo_allocator_test.go @@ -14,8 +14,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -// +build cgo -// +build ccalloc +//go:build cgo && ccalloc +// +build cgo,ccalloc package memory diff --git a/go/arrow/memory/memory_amd64.go b/go/arrow/memory/memory_amd64.go index 58356d6482558..895ddc07cf81f 100644 --- a/go/arrow/memory/memory_amd64.go +++ b/go/arrow/memory/memory_amd64.go @@ -14,6 +14,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build !noasm // +build !noasm package memory diff --git a/go/arrow/memory/memory_arm64.go b/go/arrow/memory/memory_arm64.go index 3db5d11013164..5260334958526 100755 --- a/go/arrow/memory/memory_arm64.go +++ b/go/arrow/memory/memory_arm64.go @@ -14,6 +14,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build !noasm // +build !noasm package memory diff --git a/go/arrow/memory/memory_avx2_amd64.go b/go/arrow/memory/memory_avx2_amd64.go index 2bd851ea53275..39fb3a5f7692f 100644 --- a/go/arrow/memory/memory_avx2_amd64.go +++ b/go/arrow/memory/memory_avx2_amd64.go @@ -14,6 +14,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build !noasm // +build !noasm package memory diff --git a/go/arrow/memory/memory_js_wasm.go b/go/arrow/memory/memory_js_wasm.go index 9b94d99ff33ca..5cc0c84d39ee7 100644 --- a/go/arrow/memory/memory_js_wasm.go +++ b/go/arrow/memory/memory_js_wasm.go @@ -14,6 +14,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build wasm // +build wasm package memory diff --git a/go/arrow/memory/memory_neon_arm64.go b/go/arrow/memory/memory_neon_arm64.go index 6cb0400c9c597..806ca575f22dd 100755 --- a/go/arrow/memory/memory_neon_arm64.go +++ b/go/arrow/memory/memory_neon_arm64.go @@ -14,6 +14,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build !noasm // +build !noasm package memory diff --git a/go/arrow/memory/memory_noasm.go b/go/arrow/memory/memory_noasm.go index bf8846fa2e059..44f19c091c7e0 100644 --- a/go/arrow/memory/memory_noasm.go +++ b/go/arrow/memory/memory_noasm.go @@ -14,6 +14,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build noasm // +build noasm package memory diff --git a/go/arrow/memory/memory_sse4_amd64.go b/go/arrow/memory/memory_sse4_amd64.go index 716c0d2704a88..1711a1ee3eaf7 100644 --- a/go/arrow/memory/memory_sse4_amd64.go +++ b/go/arrow/memory/memory_sse4_amd64.go @@ -14,6 +14,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build !noasm // +build !noasm package memory diff --git a/go/arrow/util/messages/README.md b/go/arrow/util/messages/README.md new file mode 100644 index 0000000000000..312484f701a46 --- /dev/null +++ b/go/arrow/util/messages/README.md @@ -0,0 +1,25 @@ + + +How to generate the .pb.go files + +``` +cd go/arrow/util/ +protoc -I ./ --go_out=./messages ./messages/types.proto +``` diff --git a/go/arrow/util/messages/types.proto b/go/arrow/util/messages/types.proto new file mode 100644 index 0000000000000..c085273ca35e0 --- /dev/null +++ b/go/arrow/util/messages/types.proto @@ -0,0 +1,56 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +syntax = "proto3"; +import "google/protobuf/any.proto"; + +option go_package = "../util_message"; + +message ExampleMessage { + string field1 = 1; +} + +message AllTheTypes { + string str = 1; + int32 int32 = 2; + int64 int64 = 3; + sint32 sint32 = 4; + sint64 sin64 = 5; + uint32 uint32 = 6; + uint64 uint64 = 7; + fixed32 fixed32 = 8; + fixed64 fixed64 = 9; + sfixed32 sfixed32 = 10; + bool bool = 11; + bytes bytes = 12; + double double = 13; + ExampleEnum enum = 14; + ExampleMessage message = 15; + oneof oneof { + string oneofstring = 16; + ExampleMessage oneofmessage = 17; + } + google.protobuf.Any any = 18; + map simple_map = 19; + map complex_map = 20; + repeated string simple_list = 21; + repeated ExampleMessage complex_list = 22; + + enum ExampleEnum { + OPTION_0 = 0; + OPTION_1 = 1; + } +} diff --git a/go/arrow/util/protobuf_reflect.go b/go/arrow/util/protobuf_reflect.go new file mode 100644 index 0000000000000..b4c8d68db8b0d --- /dev/null +++ b/go/arrow/util/protobuf_reflect.go @@ -0,0 +1,865 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package util + +import ( + "fmt" + "reflect" + + "github.com/apache/arrow/go/v17/arrow" + "github.com/apache/arrow/go/v17/arrow/array" + "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/huandu/xstrings" + "google.golang.org/protobuf/proto" + "google.golang.org/protobuf/reflect/protoreflect" + "google.golang.org/protobuf/types/known/anypb" +) + +// ProtobufTypeHandler provides options on how protobuf fields should be handled in the conversion to arrow +type ProtobufTypeHandler int + +const ( + // OneOfNull means do not wrap oneOfs in a union, they are treated as separate fields + OneOfNull ProtobufTypeHandler = iota + // OneOfDenseUnion maps the protobuf OneOf to an arrow.DENSE_UNION + OneOfDenseUnion + // EnumNumber uses the Enum numeric value + EnumNumber + // EnumValue uses the Enum string value + EnumValue + // EnumDictionary uses both the numeric and string value and maps to an arrow.Dictionary + EnumDictionary +) + +type schemaOptions struct { + exclusionPolicy func(pfr *ProtobufFieldReflection) bool + fieldNameFormatter func(str string) string + oneOfHandler ProtobufTypeHandler + enumHandler ProtobufTypeHandler +} + +// ProtobufFieldReflection represents the metadata and values of a protobuf field +type ProtobufFieldReflection struct { + parent *ProtobufMessageReflection + descriptor protoreflect.FieldDescriptor + prValue protoreflect.Value + rValue reflect.Value + schemaOptions + arrow.Field +} + +func (pfr *ProtobufFieldReflection) isNull() bool { + for pfr.rValue.Kind() == reflect.Ptr { + if pfr.rValue.IsNil() { + return true + } + pfr.rValue = pfr.rValue.Elem() + } + + if !pfr.rValue.IsValid() || !pfr.prValue.IsValid() { + return true + } + return false +} + +func (pfr *ProtobufFieldReflection) arrowField() arrow.Field { + return arrow.Field{ + Name: pfr.name(), + Type: pfr.getDataType(), + Nullable: true, + } +} + +func (pfr *ProtobufFieldReflection) protoreflectValue() protoreflect.Value { + return pfr.prValue +} + +func (pfr *ProtobufFieldReflection) reflectValue() reflect.Value { + return pfr.rValue +} + +func (pfr *ProtobufFieldReflection) GetDescriptor() protoreflect.FieldDescriptor { + return pfr.descriptor +} + +func (pfr *ProtobufFieldReflection) name() string { + if pfr.isOneOf() && pfr.schemaOptions.oneOfHandler != OneOfNull { + return pfr.fieldNameFormatter(string(pfr.descriptor.ContainingOneof().Name())) + } + return pfr.fieldNameFormatter(string(pfr.descriptor.Name())) +} + +func (pfr *ProtobufFieldReflection) arrowType() arrow.Type { + if pfr.isOneOf() && pfr.schemaOptions.oneOfHandler == OneOfDenseUnion { + return arrow.DENSE_UNION + } + if pfr.isEnum() { + switch pfr.enumHandler { + case EnumNumber: + return arrow.INT32 + case EnumValue: + return arrow.STRING + case EnumDictionary: + return arrow.DICTIONARY + } + } + if pfr.isStruct() { + return arrow.STRUCT + } + if pfr.isMap() { + return arrow.MAP + } + if pfr.isList() { + return arrow.LIST + } + switch pfr.descriptor.Kind() { + case protoreflect.Int32Kind: + return arrow.INT32 + case protoreflect.Int64Kind: + return arrow.INT64 + case protoreflect.Sint32Kind: + return arrow.INT32 + case protoreflect.Sint64Kind: + return arrow.INT64 + case protoreflect.Uint32Kind: + return arrow.UINT32 + case protoreflect.Uint64Kind: + return arrow.UINT64 + case protoreflect.Fixed32Kind: + return arrow.UINT32 + case protoreflect.Fixed64Kind: + return arrow.UINT64 + case protoreflect.Sfixed32Kind: + return arrow.INT32 + case protoreflect.Sfixed64Kind: + return arrow.INT64 + case protoreflect.FloatKind: + return arrow.FLOAT32 + case protoreflect.DoubleKind: + return arrow.FLOAT64 + case protoreflect.StringKind: + return arrow.STRING + case protoreflect.BytesKind: + return arrow.BINARY + case protoreflect.BoolKind: + return arrow.BOOL + } + return arrow.NULL +} + +func (pfr *ProtobufFieldReflection) isOneOf() bool { + return pfr.descriptor.ContainingOneof() != nil +} + +func (pfr *ProtobufFieldReflection) isEnum() bool { + return pfr.descriptor.Kind() == protoreflect.EnumKind +} + +func (pfr *ProtobufFieldReflection) isStruct() bool { + return pfr.descriptor.Kind() == protoreflect.MessageKind && !pfr.descriptor.IsMap() && pfr.rValue.Kind() != reflect.Slice +} + +func (pfr *ProtobufFieldReflection) isMap() bool { + return pfr.descriptor.Kind() == protoreflect.MessageKind && pfr.descriptor.IsMap() +} + +func (pfr *ProtobufFieldReflection) isList() bool { + return pfr.descriptor.IsList() && pfr.rValue.Kind() == reflect.Slice +} + +// ProtobufMessageReflection represents the metadata and values of a protobuf message +type ProtobufMessageReflection struct { + descriptor protoreflect.MessageDescriptor + message protoreflect.Message + rValue reflect.Value + schemaOptions + fields []ProtobufMessageFieldReflection +} + +func (psr ProtobufMessageReflection) unmarshallAny() ProtobufMessageReflection { + if psr.descriptor.FullName() == "google.protobuf.Any" && psr.rValue.IsValid() { + for psr.rValue.Type().Kind() == reflect.Ptr { + psr.rValue = reflect.Indirect(psr.rValue) + } + fieldValueAsAny, _ := psr.rValue.Interface().(anypb.Any) + msg, _ := fieldValueAsAny.UnmarshalNew() + + v := reflect.ValueOf(msg) + for v.Kind() == reflect.Ptr { + v = reflect.Indirect(v) + } + + return ProtobufMessageReflection{ + descriptor: msg.ProtoReflect().Descriptor(), + message: msg.ProtoReflect(), + rValue: v, + schemaOptions: psr.schemaOptions, + } + } else { + return psr + } +} + +func (psr ProtobufMessageReflection) getArrowFields() []arrow.Field { + var fields []arrow.Field + + for pfr := range psr.generateStructFields() { + fields = append(fields, arrow.Field{ + Name: pfr.name(), + Type: pfr.getDataType(), + Nullable: true, + }) + } + + return fields +} + +type protobufListReflection struct { + ProtobufFieldReflection +} + +func (pfr *ProtobufFieldReflection) asList() protobufListReflection { + return protobufListReflection{*pfr} +} + +func (plr protobufListReflection) getDataType() arrow.DataType { + for li := range plr.generateListItems() { + return arrow.ListOf(li.getDataType()) + } + pfr := ProtobufFieldReflection{ + descriptor: plr.descriptor, + schemaOptions: plr.schemaOptions, + } + return arrow.ListOf(pfr.getDataType()) +} + +type protobufUnionReflection struct { + ProtobufFieldReflection +} + +func (pfr *ProtobufFieldReflection) asUnion() protobufUnionReflection { + return protobufUnionReflection{*pfr} +} + +func (pur protobufUnionReflection) isThisOne() bool { + for pur.rValue.Kind() == reflect.Ptr || pur.rValue.Kind() == reflect.Interface { + pur.rValue = pur.rValue.Elem() + } + return pur.rValue.Field(0).String() == pur.prValue.String() +} + +func (pur protobufUnionReflection) whichOne() arrow.UnionTypeCode { + fds := pur.descriptor.ContainingOneof().Fields() + for i := 0; i < fds.Len(); i++ { + pfr := pur.parent.getFieldByName(string(fds.Get(i).Name())) + if pfr.asUnion().isThisOne() { + return pur.getUnionTypeCode(int32(pfr.descriptor.Number())) + } + } + // i.e. all null + return -1 +} + +func (pur protobufUnionReflection) getField() *ProtobufFieldReflection { + fds := pur.descriptor.ContainingOneof().Fields() + for i := 0; i < fds.Len(); i++ { + pfr := pur.parent.getFieldByName(string(fds.Get(i).Name())) + if pfr.asUnion().isThisOne() { + return pfr + } + } + // i.e. all null + return nil +} + +func (pur protobufUnionReflection) getUnionTypeCode(n int32) arrow.UnionTypeCode { + //We use the index of the field number as there is a limit on the arrow.UnionTypeCode (127) + //which a protobuf Number could realistically exceed + fds := pur.descriptor.ContainingOneof().Fields() + for i := 0; i < fds.Len(); i++ { + if n == int32(fds.Get(i).Number()) { + return int8(i) + } + } + return -1 +} + +func (pur protobufUnionReflection) generateUnionFields() chan *ProtobufFieldReflection { + out := make(chan *ProtobufFieldReflection) + go func() { + defer close(out) + fds := pur.descriptor.ContainingOneof().Fields() + for i := 0; i < fds.Len(); i++ { + pfr := pur.parent.getFieldByName(string(fds.Get(i).Name())) + // Do not get stuck in a recursion loop + pfr.oneOfHandler = OneOfNull + if pfr.exclusionPolicy(pfr) { + continue + } + out <- pfr + } + }() + + return out +} + +func (pur protobufUnionReflection) getArrowFields() []arrow.Field { + var fields []arrow.Field + + for pfr := range pur.generateUnionFields() { + fields = append(fields, pfr.arrowField()) + } + + return fields +} + +func (pur protobufUnionReflection) getDataType() arrow.DataType { + fds := pur.getArrowFields() + typeCodes := make([]arrow.UnionTypeCode, len(fds)) + for i := 0; i < len(fds); i++ { + typeCodes[i] = arrow.UnionTypeCode(i) + } + return arrow.DenseUnionOf(fds, typeCodes) +} + +type protobufDictReflection struct { + ProtobufFieldReflection +} + +func (pfr *ProtobufFieldReflection) asDictionary() protobufDictReflection { + return protobufDictReflection{*pfr} +} + +func (pdr protobufDictReflection) getDataType() arrow.DataType { + return &arrow.DictionaryType{ + IndexType: arrow.PrimitiveTypes.Int32, + ValueType: arrow.BinaryTypes.String, + Ordered: false, + } +} + +func (pdr protobufDictReflection) getDictValues(mem memory.Allocator) arrow.Array { + enumValues := pdr.descriptor.Enum().Values() + bldr := array.NewStringBuilder(mem) + for i := 0; i < enumValues.Len(); i++ { + bldr.Append(string(enumValues.Get(i).Name())) + } + return bldr.NewArray() +} + +type protobufMapReflection struct { + ProtobufFieldReflection +} + +func (pfr *ProtobufFieldReflection) asMap() protobufMapReflection { + return protobufMapReflection{*pfr} +} + +func (pmr protobufMapReflection) getDataType() arrow.DataType { + for kvp := range pmr.generateKeyValuePairs() { + return kvp.getDataType() + } + return protobufMapKeyValuePairReflection{ + k: ProtobufFieldReflection{ + parent: pmr.parent, + descriptor: pmr.descriptor.MapKey(), + schemaOptions: pmr.schemaOptions, + }, + v: ProtobufFieldReflection{ + parent: pmr.parent, + descriptor: pmr.descriptor.MapValue(), + schemaOptions: pmr.schemaOptions, + }, + }.getDataType() +} + +type protobufMapKeyValuePairReflection struct { + k ProtobufFieldReflection + v ProtobufFieldReflection +} + +func (pmr protobufMapKeyValuePairReflection) getDataType() arrow.DataType { + return arrow.MapOf(pmr.k.getDataType(), pmr.v.getDataType()) +} + +func (pmr protobufMapReflection) generateKeyValuePairs() chan protobufMapKeyValuePairReflection { + out := make(chan protobufMapKeyValuePairReflection) + + go func() { + defer close(out) + for _, k := range pmr.rValue.MapKeys() { + kvp := protobufMapKeyValuePairReflection{ + k: ProtobufFieldReflection{ + parent: pmr.parent, + descriptor: pmr.descriptor.MapKey(), + prValue: getMapKey(k), + rValue: k, + schemaOptions: pmr.schemaOptions, + }, + v: ProtobufFieldReflection{ + parent: pmr.parent, + descriptor: pmr.descriptor.MapValue(), + prValue: pmr.prValue.Map().Get(protoreflect.MapKey(getMapKey(k))), + rValue: pmr.rValue.MapIndex(k), + schemaOptions: pmr.schemaOptions, + }, + } + out <- kvp + } + }() + + return out +} + +func getMapKey(v reflect.Value) protoreflect.Value { + switch v.Kind() { + case reflect.String: + return protoreflect.ValueOf(v.String()) + case reflect.Int32, reflect.Int64: + return protoreflect.ValueOf(v.Int()) + case reflect.Bool: + return protoreflect.ValueOf(v.Bool()) + case reflect.Uint32, reflect.Uint64: + return protoreflect.ValueOf(v.Uint()) + default: + panic("Unmapped protoreflect map key type") + } +} + +func (psr ProtobufMessageReflection) generateStructFields() chan *ProtobufFieldReflection { + out := make(chan *ProtobufFieldReflection) + + go func() { + defer close(out) + fds := psr.descriptor.Fields() + for i := 0; i < fds.Len(); i++ { + pfr := psr.getFieldByName(string(fds.Get(i).Name())) + if psr.exclusionPolicy(pfr) { + continue + } + if pfr.arrowType() == arrow.DENSE_UNION { + if pfr.descriptor.Number() != pfr.descriptor.ContainingOneof().Fields().Get(0).Number() { + continue + } + } + out <- pfr + } + }() + + return out +} + +func (psr ProtobufMessageReflection) generateFields() chan *ProtobufFieldReflection { + out := make(chan *ProtobufFieldReflection) + + go func() { + defer close(out) + fds := psr.descriptor.Fields() + for i := 0; i < fds.Len(); i++ { + pfr := psr.getFieldByName(string(fds.Get(i).Name())) + if psr.exclusionPolicy(pfr) { + continue + } + if pfr.arrowType() == arrow.DENSE_UNION { + if pfr.descriptor.Number() != pfr.descriptor.ContainingOneof().Fields().Get(0).Number() { + continue + } + } + out <- pfr + } + }() + + return out +} + +func (pfr *ProtobufFieldReflection) asStruct() ProtobufMessageReflection { + psr := ProtobufMessageReflection{ + descriptor: pfr.descriptor.Message(), + rValue: pfr.rValue, + schemaOptions: pfr.schemaOptions, + } + if pfr.prValue.IsValid() { + psr.message = pfr.prValue.Message() + } + psr = psr.unmarshallAny() + return psr +} + +func (psr ProtobufMessageReflection) getDataType() arrow.DataType { + return arrow.StructOf(psr.getArrowFields()...) +} + +func (psr ProtobufMessageReflection) getFieldByName(n string) *ProtobufFieldReflection { + fd := psr.descriptor.Fields().ByTextName(xstrings.ToSnakeCase(n)) + fv := psr.rValue + if fv.IsValid() { + if !fv.IsZero() { + for fv.Kind() == reflect.Ptr || fv.Kind() == reflect.Interface { + fv = fv.Elem() + } + if fd.ContainingOneof() != nil { + n = string(fd.ContainingOneof().Name()) + } + fv = fv.FieldByName(xstrings.ToCamelCase(n)) + for fv.Kind() == reflect.Ptr { + fv = fv.Elem() + } + } + } + pfr := ProtobufFieldReflection{ + parent: &psr, + descriptor: fd, + rValue: fv, + schemaOptions: psr.schemaOptions, + } + if psr.message != nil { + pfr.prValue = psr.message.Get(fd) + } + return &pfr +} + +func (plr protobufListReflection) generateListItems() chan ProtobufFieldReflection { + out := make(chan ProtobufFieldReflection) + + go func() { + defer close(out) + for i := 0; i < plr.prValue.List().Len(); i++ { + out <- ProtobufFieldReflection{ + descriptor: plr.descriptor, + prValue: plr.prValue.List().Get(i), + rValue: plr.rValue.Index(i), + schemaOptions: plr.schemaOptions, + } + } + }() + + return out +} + +func (pfr *ProtobufFieldReflection) getDataType() arrow.DataType { + switch pfr.arrowType() { + case arrow.DENSE_UNION: + return pfr.asUnion().getDataType() + case arrow.DICTIONARY: + return pfr.asDictionary().getDataType() + case arrow.LIST: + return pfr.asList().getDataType() + case arrow.MAP: + return pfr.asMap().getDataType() + case arrow.STRUCT: + return pfr.asStruct().getDataType() + case arrow.INT32: + return arrow.PrimitiveTypes.Int32 + case arrow.INT64: + return arrow.PrimitiveTypes.Int64 + case arrow.UINT32: + return arrow.PrimitiveTypes.Uint32 + case arrow.UINT64: + return arrow.PrimitiveTypes.Uint64 + case arrow.FLOAT32: + return arrow.PrimitiveTypes.Float32 + case arrow.FLOAT64: + return arrow.PrimitiveTypes.Float64 + case arrow.STRING: + return arrow.BinaryTypes.String + case arrow.BINARY: + return arrow.BinaryTypes.Binary + case arrow.BOOL: + return arrow.FixedWidthTypes.Boolean + } + return nil +} + +type protobufReflection interface { + name() string + arrowType() arrow.Type + protoreflectValue() protoreflect.Value + reflectValue() reflect.Value + GetDescriptor() protoreflect.FieldDescriptor + isNull() bool + isEnum() bool + asDictionary() protobufDictReflection + isList() bool + asList() protobufListReflection + isMap() bool + asMap() protobufMapReflection + isStruct() bool + asStruct() ProtobufMessageReflection + isOneOf() bool + asUnion() protobufUnionReflection +} + +// ProtobufMessageFieldReflection links together the message and it's fields +type ProtobufMessageFieldReflection struct { + parent *ProtobufMessageReflection + protobufReflection + arrow.Field +} + +// Schema returns an arrow.Schema representing a protobuf message +func (msg ProtobufMessageReflection) Schema() *arrow.Schema { + var fields []arrow.Field + for _, f := range msg.fields { + fields = append(fields, f.Field) + } + return arrow.NewSchema(fields, nil) +} + +// Record returns an arrow.Record for a protobuf message +func (msg ProtobufMessageReflection) Record(mem memory.Allocator) arrow.Record { + if mem == nil { + mem = memory.NewGoAllocator() + } + + schema := msg.Schema() + + recordBuilder := array.NewRecordBuilder(mem, schema) + + var fieldNames []string + for i, f := range msg.fields { + f.AppendValueOrNull(recordBuilder.Field(i), mem) + fieldNames = append(fieldNames, f.protobufReflection.name()) + } + + var arrays []arrow.Array + for _, bldr := range recordBuilder.Fields() { + a := bldr.NewArray() + arrays = append(arrays, a) + } + + structArray, _ := array.NewStructArray(arrays, fieldNames) + + return array.RecordFromStructArray(structArray, schema) +} + +// NewProtobufMessageReflection initialises a ProtobufMessageReflection +// can be used to convert a protobuf message into an arrow Record +func NewProtobufMessageReflection(msg proto.Message, options ...option) *ProtobufMessageReflection { + v := reflect.ValueOf(msg) + for v.Kind() == reflect.Ptr { + v = v.Elem() + } + includeAll := func(pfr *ProtobufFieldReflection) bool { + return false + } + noFormatting := func(str string) string { + return str + } + psr := &ProtobufMessageReflection{ + descriptor: msg.ProtoReflect().Descriptor(), + message: msg.ProtoReflect(), + rValue: v, + schemaOptions: schemaOptions{ + exclusionPolicy: includeAll, + fieldNameFormatter: noFormatting, + oneOfHandler: OneOfNull, + enumHandler: EnumDictionary, + }, + } + + for _, opt := range options { + opt(psr) + } + + var fields []ProtobufMessageFieldReflection + + for pfr := range psr.generateFields() { + fields = append(fields, ProtobufMessageFieldReflection{ + parent: psr, + protobufReflection: pfr, + Field: pfr.arrowField(), + }) + } + + psr.fields = fields + + return psr +} + +type option func(*ProtobufMessageReflection) + +// WithExclusionPolicy is an option for a ProtobufMessageReflection +// WithExclusionPolicy acts as a deny filter on the fields of a protobuf message +// i.e. prevents them from being included in the schema. +// A use case for this is to exclude fields containing PII. +func WithExclusionPolicy(ex func(pfr *ProtobufFieldReflection) bool) option { + return func(psr *ProtobufMessageReflection) { + psr.exclusionPolicy = ex + } +} + +// WithFieldNameFormatter is an option for a ProtobufMessageReflection +// WithFieldNameFormatter enables customisation of the field names in the arrow schema +// By default, the field names are taken from the protobuf message (.proto file) +func WithFieldNameFormatter(formatter func(str string) string) option { + return func(psr *ProtobufMessageReflection) { + psr.fieldNameFormatter = formatter + } +} + +// WithOneOfHandler is an option for a ProtobufMessageReflection +// WithOneOfHandler enables customisation of the protobuf oneOf type in the arrow schema +// By default, the oneOfs are mapped to separate columns +func WithOneOfHandler(oneOfHandler ProtobufTypeHandler) option { + return func(psr *ProtobufMessageReflection) { + psr.oneOfHandler = oneOfHandler + } +} + +// WithEnumHandler is an option for a ProtobufMessageReflection +// WithEnumHandler enables customisation of the protobuf Enum type in the arrow schema +// By default, the Enums are mapped to arrow.Dictionary +func WithEnumHandler(enumHandler ProtobufTypeHandler) option { + return func(psr *ProtobufMessageReflection) { + psr.enumHandler = enumHandler + } +} + +// AppendValueOrNull add the value of a protobuf field to an arrow array builder +func (f ProtobufMessageFieldReflection) AppendValueOrNull(b array.Builder, mem memory.Allocator) error { + pv := f.protoreflectValue() + fd := f.GetDescriptor() + + if f.isNull() { + b.AppendNull() + return nil + } + + switch b.Type().ID() { + case arrow.STRING: + if f.protobufReflection.isEnum() { + b.(*array.StringBuilder).Append(string(fd.Enum().Values().ByNumber(pv.Enum()).Name())) + } else { + b.(*array.StringBuilder).Append(pv.String()) + } + case arrow.BINARY: + b.(*array.BinaryBuilder).Append(pv.Bytes()) + case arrow.INT32: + if f.protobufReflection.isEnum() { + b.(*array.Int32Builder).Append(int32(f.reflectValue().Int())) + } else { + b.(*array.Int32Builder).Append(int32(pv.Int())) + } + case arrow.INT64: + b.(*array.Int64Builder).Append(pv.Int()) + case arrow.FLOAT64: + b.(*array.Float64Builder).Append(pv.Float()) + case arrow.UINT32: + b.(*array.Uint32Builder).Append(uint32(pv.Uint())) + case arrow.UINT64: + b.(*array.Uint64Builder).Append(pv.Uint()) + case arrow.BOOL: + b.(*array.BooleanBuilder).Append(pv.Bool()) + case arrow.DENSE_UNION: + ub := b.(array.UnionBuilder) + pur := f.asUnion() + if pur.whichOne() == -1 { + ub.AppendNull() + break + } + ub.Append(pur.whichOne()) + cb := ub.Child(int(pur.whichOne())) + err := ProtobufMessageFieldReflection{ + parent: f.parent, + protobufReflection: pur.getField(), + Field: pur.arrowField(), + }.AppendValueOrNull(cb, mem) + if err != nil { + return err + } + case arrow.DICTIONARY: + pdr := f.asDictionary() + db := b.(*array.BinaryDictionaryBuilder) + err := db.InsertStringDictValues(pdr.getDictValues(mem).(*array.String)) + if err != nil { + return err + } + enumNum := int(f.reflectValue().Int()) + enumVal := fd.Enum().Values().ByNumber(protoreflect.EnumNumber(enumNum)).Name() + err = db.AppendValueFromString(string(enumVal)) + if err != nil { + return err + } + case arrow.STRUCT: + sb := b.(*array.StructBuilder) + sb.Append(true) + child := ProtobufMessageFieldReflection{ + parent: f.parent, + } + for i, field := range f.Field.Type.(*arrow.StructType).Fields() { + child.protobufReflection = f.asStruct().getFieldByName(field.Name) + child.Field = field + err := child.AppendValueOrNull(sb.FieldBuilder(i), mem) + if err != nil { + return err + } + } + case arrow.LIST: + lb := b.(*array.ListBuilder) + l := pv.List().Len() + if l == 0 { + lb.AppendEmptyValue() + break + } + lb.ValueBuilder().Reserve(l) + lb.Append(true) + child := ProtobufMessageFieldReflection{ + parent: f.parent, + Field: f.Field.Type.(*arrow.ListType).ElemField(), + } + for li := range f.asList().generateListItems() { + child.protobufReflection = &li + err := child.AppendValueOrNull(lb.ValueBuilder(), mem) + if err != nil { + return err + } + } + case arrow.MAP: + mb := b.(*array.MapBuilder) + l := pv.Map().Len() + if l == 0 { + mb.AppendEmptyValue() + break + } + mb.KeyBuilder().Reserve(l) + mb.ItemBuilder().Reserve(l) + mb.Append(true) + k := ProtobufMessageFieldReflection{ + parent: f.parent, + Field: f.Field.Type.(*arrow.MapType).KeyField(), + } + v := ProtobufMessageFieldReflection{ + parent: f.parent, + Field: f.Field.Type.(*arrow.MapType).ItemField(), + } + for kvp := range f.asMap().generateKeyValuePairs() { + k.protobufReflection = &kvp.k + err := k.AppendValueOrNull(mb.KeyBuilder(), mem) + if err != nil { + return err + } + v.protobufReflection = &kvp.v + err = v.AppendValueOrNull(mb.ItemBuilder(), mem) + if err != nil { + return err + } + } + default: + return fmt.Errorf("not able to appendValueOrNull for type %s", b.Type().ID()) + } + return nil +} diff --git a/go/arrow/util/protobuf_reflect_test.go b/go/arrow/util/protobuf_reflect_test.go new file mode 100644 index 0000000000000..ab3cbdf9a6b13 --- /dev/null +++ b/go/arrow/util/protobuf_reflect_test.go @@ -0,0 +1,311 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package util + +import ( + "strings" + "testing" + + "github.com/apache/arrow/go/v17/arrow" + "github.com/apache/arrow/go/v17/arrow/array" + "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v17/arrow/util/util_message" + "github.com/huandu/xstrings" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "google.golang.org/protobuf/types/known/anypb" +) + +func SetupTest() util_message.AllTheTypes { + msg := util_message.ExampleMessage{ + Field1: "Example", + } + + anyMsg, _ := anypb.New(&msg) + + return util_message.AllTheTypes{ + Str: "Hello", + Int32: 10, + Int64: 100, + Sint32: -10, + Sin64: -100, + Uint32: 10, + Uint64: 100, + Fixed32: 10, + Fixed64: 1000, + Sfixed32: 10, + Bool: false, + Bytes: []byte("Hello, world!"), + Double: 1.1, + Enum: util_message.AllTheTypes_OPTION_1, + Message: &msg, + Oneof: &util_message.AllTheTypes_Oneofstring{Oneofstring: "World"}, + Any: anyMsg, + //Breaks the test as the Golang maps have a non-deterministic order + //SimpleMap: map[int32]string{99: "Hello", 100: "World", 98: "How", 101: "Are", 1: "You"}, + SimpleMap: map[int32]string{99: "Hello"}, + ComplexMap: map[string]*util_message.ExampleMessage{"complex": &msg}, + SimpleList: []string{"Hello", "World"}, + ComplexList: []*util_message.ExampleMessage{&msg}, + } +} + +func TestGetSchema(t *testing.T) { + msg := SetupTest() + + got := NewProtobufMessageReflection(&msg).Schema().String() + want := `schema: + fields: 22 + - str: type=utf8, nullable + - int32: type=int32, nullable + - int64: type=int64, nullable + - sint32: type=int32, nullable + - sin64: type=int64, nullable + - uint32: type=uint32, nullable + - uint64: type=uint64, nullable + - fixed32: type=uint32, nullable + - fixed64: type=uint64, nullable + - sfixed32: type=int32, nullable + - bool: type=bool, nullable + - bytes: type=binary, nullable + - double: type=float64, nullable + - enum: type=dictionary, nullable + - message: type=struct, nullable + - oneofstring: type=utf8, nullable + - oneofmessage: type=struct, nullable + - any: type=struct, nullable + - simple_map: type=map, nullable + - complex_map: type=map, items_nullable>, nullable + - simple_list: type=list, nullable + - complex_list: type=list, nullable>, nullable` + + require.Equal(t, want, got, "got: %s\nwant: %s", got, want) + + got = NewProtobufMessageReflection(&msg, WithOneOfHandler(OneOfDenseUnion)).Schema().String() + want = `schema: + fields: 21 + - str: type=utf8, nullable + - int32: type=int32, nullable + - int64: type=int64, nullable + - sint32: type=int32, nullable + - sin64: type=int64, nullable + - uint32: type=uint32, nullable + - uint64: type=uint64, nullable + - fixed32: type=uint32, nullable + - fixed64: type=uint64, nullable + - sfixed32: type=int32, nullable + - bool: type=bool, nullable + - bytes: type=binary, nullable + - double: type=float64, nullable + - enum: type=dictionary, nullable + - message: type=struct, nullable + - oneof: type=dense_union, nullable=1>, nullable + - any: type=struct, nullable + - simple_map: type=map, nullable + - complex_map: type=map, items_nullable>, nullable + - simple_list: type=list, nullable + - complex_list: type=list, nullable>, nullable` + + require.Equal(t, want, got, "got: %s\nwant: %s", got, want) + + excludeComplex := func(pfr *ProtobufFieldReflection) bool { + return pfr.isMap() || pfr.isList() || pfr.isStruct() + } + + got = NewProtobufMessageReflection(&msg, WithExclusionPolicy(excludeComplex)).Schema().String() + want = `schema: + fields: 15 + - str: type=utf8, nullable + - int32: type=int32, nullable + - int64: type=int64, nullable + - sint32: type=int32, nullable + - sin64: type=int64, nullable + - uint32: type=uint32, nullable + - uint64: type=uint64, nullable + - fixed32: type=uint32, nullable + - fixed64: type=uint64, nullable + - sfixed32: type=int32, nullable + - bool: type=bool, nullable + - bytes: type=binary, nullable + - double: type=float64, nullable + - enum: type=dictionary, nullable + - oneofstring: type=utf8, nullable` + + require.Equal(t, want, got, "got: %s\nwant: %s", got, want) + + got = NewProtobufMessageReflection( + &msg, + WithExclusionPolicy(excludeComplex), + WithFieldNameFormatter(xstrings.ToCamelCase), + ).Schema().String() + want = `schema: + fields: 15 + - Str: type=utf8, nullable + - Int32: type=int32, nullable + - Int64: type=int64, nullable + - Sint32: type=int32, nullable + - Sin64: type=int64, nullable + - Uint32: type=uint32, nullable + - Uint64: type=uint64, nullable + - Fixed32: type=uint32, nullable + - Fixed64: type=uint64, nullable + - Sfixed32: type=int32, nullable + - Bool: type=bool, nullable + - Bytes: type=binary, nullable + - Double: type=float64, nullable + - Enum: type=dictionary, nullable + - Oneofstring: type=utf8, nullable` + + require.Equal(t, want, got, "got: %s\nwant: %s", got, want) + + onlyEnum := func(pfr *ProtobufFieldReflection) bool { + return !pfr.isEnum() + } + got = NewProtobufMessageReflection( + &msg, + WithExclusionPolicy(onlyEnum), + WithEnumHandler(EnumNumber), + ).Schema().String() + want = `schema: + fields: 1 + - enum: type=int32, nullable` + + require.Equal(t, want, got, "got: %s\nwant: %s", got, want) + + got = NewProtobufMessageReflection( + &msg, + WithExclusionPolicy(onlyEnum), + WithEnumHandler(EnumValue), + ).Schema().String() + want = `schema: + fields: 1 + - enum: type=utf8, nullable` + + require.Equal(t, want, got, "got: %s\nwant: %s", got, want) +} + +func TestRecordFromProtobuf(t *testing.T) { + msg := SetupTest() + + pmr := NewProtobufMessageReflection(&msg, WithOneOfHandler(OneOfDenseUnion)) + schema := pmr.Schema() + got := pmr.Record(nil) + jsonStr := `[ + { + "str":"Hello", + "int32":10, + "int64":100, + "sint32":-10, + "sin64":-100, + "uint32":10, + "uint64":100, + "fixed32":10, + "fixed64":1000, + "sfixed32":10, + "bool":false, + "bytes":"SGVsbG8sIHdvcmxkIQ==", + "double":1.1, + "enum":"OPTION_1", + "message":{"field1":"Example"}, + "oneof": [0, "World"], + "any":{"field1":"Example"}, + "simple_map":[{"key":99,"value":"Hello"}], + "complex_map":[{"key":"complex","value":{"field1":"Example"}}], + "simple_list":["Hello","World"], + "complex_list":[{"field1":"Example"}] + } + ]` + want, _, err := array.RecordFromJSON(memory.NewGoAllocator(), schema, strings.NewReader(jsonStr)) + + require.NoError(t, err) + require.EqualExportedValues(t, got, want, "got: %s\nwant: %s", got, want) + + onlyEnum := func(pfr *ProtobufFieldReflection) bool { return !pfr.isEnum() } + pmr = NewProtobufMessageReflection(&msg, WithExclusionPolicy(onlyEnum), WithEnumHandler(EnumValue)) + got = pmr.Record(nil) + jsonStr = `[ { "enum":"OPTION_1" } ]` + want, _, err = array.RecordFromJSON(memory.NewGoAllocator(), pmr.Schema(), strings.NewReader(jsonStr)) + require.NoError(t, err) + require.True(t, array.RecordEqual(got, want), "got: %s\nwant: %s", got, want) + + pmr = NewProtobufMessageReflection(&msg, WithExclusionPolicy(onlyEnum), WithEnumHandler(EnumNumber)) + got = pmr.Record(nil) + jsonStr = `[ { "enum":"1" } ]` + want, _, err = array.RecordFromJSON(memory.NewGoAllocator(), pmr.Schema(), strings.NewReader(jsonStr)) + require.NoError(t, err) + require.True(t, array.RecordEqual(got, want), "got: %s\nwant: %s", got, want) +} + +func TestNullRecordFromProtobuf(t *testing.T) { + pmr := NewProtobufMessageReflection(&util_message.AllTheTypes{}) + schema := pmr.Schema() + got := pmr.Record(nil) + _, _ = got.MarshalJSON() + jsonStr := `[ + { + "str":"", + "int32":0, + "int64":0, + "sint32":0, + "sin64":0, + "uint32":0, + "uint64":0, + "fixed32":0, + "fixed64":0, + "sfixed32":0, + "bool":false, + "bytes":"", + "double":0, + "enum":"OPTION_0", + "message":null, + "oneofmessage":{"field1":""}, + "oneofstring":"", + "any":null, + "simple_map":[], + "complex_map":[], + "simple_list":[], + "complex_list":[] + } + ]` + + want, _, err := array.RecordFromJSON(memory.NewGoAllocator(), schema, strings.NewReader(jsonStr)) + + require.NoError(t, err) + require.EqualExportedValues(t, got, want, "got: %s\nwant: %s", got, want) +} + +type testProtobufReflection struct { + ProtobufFieldReflection +} + +func (tpr testProtobufReflection) isNull() bool { + return false +} + +func TestAppendValueOrNull(t *testing.T) { + unsupportedField := arrow.Field{Name: "Test", Type: arrow.FixedWidthTypes.Time32s} + schema := arrow.NewSchema([]arrow.Field{unsupportedField}, nil) + mem := memory.NewGoAllocator() + recordBuilder := array.NewRecordBuilder(mem, schema) + pmfr := ProtobufMessageFieldReflection{ + protobufReflection: &testProtobufReflection{}, + Field: arrow.Field{Name: "Test", Type: arrow.FixedWidthTypes.Time32s}, + } + got := pmfr.AppendValueOrNull(recordBuilder.Field(0), mem) + want := "not able to appendValueOrNull for type TIME32" + assert.EqualErrorf(t, got, want, "Error is: %v, want: %v", got, want) +} diff --git a/go/arrow/util/util_message/types.pb.go b/go/arrow/util/util_message/types.pb.go new file mode 100644 index 0000000000000..80e18847c1970 --- /dev/null +++ b/go/arrow/util/util_message/types.pb.go @@ -0,0 +1,539 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Code generated by protoc-gen-go. DO NOT EDIT. +// versions: +// protoc-gen-go v1.31.0 +// protoc v4.24.4 +// source: messages/types.proto + +package util_message + +import ( + reflect "reflect" + sync "sync" + + protoreflect "google.golang.org/protobuf/reflect/protoreflect" + protoimpl "google.golang.org/protobuf/runtime/protoimpl" + anypb "google.golang.org/protobuf/types/known/anypb" +) + +const ( + // Verify that this generated code is sufficiently up-to-date. + _ = protoimpl.EnforceVersion(20 - protoimpl.MinVersion) + // Verify that runtime/protoimpl is sufficiently up-to-date. + _ = protoimpl.EnforceVersion(protoimpl.MaxVersion - 20) +) + +type AllTheTypes_ExampleEnum int32 + +const ( + AllTheTypes_OPTION_0 AllTheTypes_ExampleEnum = 0 + AllTheTypes_OPTION_1 AllTheTypes_ExampleEnum = 1 +) + +// Enum value maps for AllTheTypes_ExampleEnum. +var ( + AllTheTypes_ExampleEnum_name = map[int32]string{ + 0: "OPTION_0", + 1: "OPTION_1", + } + AllTheTypes_ExampleEnum_value = map[string]int32{ + "OPTION_0": 0, + "OPTION_1": 1, + } +) + +func (x AllTheTypes_ExampleEnum) Enum() *AllTheTypes_ExampleEnum { + p := new(AllTheTypes_ExampleEnum) + *p = x + return p +} + +func (x AllTheTypes_ExampleEnum) String() string { + return protoimpl.X.EnumStringOf(x.Descriptor(), protoreflect.EnumNumber(x)) +} + +func (AllTheTypes_ExampleEnum) Descriptor() protoreflect.EnumDescriptor { + return file_messages_types_proto_enumTypes[0].Descriptor() +} + +func (AllTheTypes_ExampleEnum) Type() protoreflect.EnumType { + return &file_messages_types_proto_enumTypes[0] +} + +func (x AllTheTypes_ExampleEnum) Number() protoreflect.EnumNumber { + return protoreflect.EnumNumber(x) +} + +// Deprecated: Use AllTheTypes_ExampleEnum.Descriptor instead. +func (AllTheTypes_ExampleEnum) EnumDescriptor() ([]byte, []int) { + return file_messages_types_proto_rawDescGZIP(), []int{1, 0} +} + +type ExampleMessage struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + Field1 string `protobuf:"bytes,1,opt,name=field1,proto3" json:"field1,omitempty"` +} + +func (x *ExampleMessage) Reset() { + *x = ExampleMessage{} + if protoimpl.UnsafeEnabled { + mi := &file_messages_types_proto_msgTypes[0] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *ExampleMessage) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*ExampleMessage) ProtoMessage() {} + +func (x *ExampleMessage) ProtoReflect() protoreflect.Message { + mi := &file_messages_types_proto_msgTypes[0] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use ExampleMessage.ProtoReflect.Descriptor instead. +func (*ExampleMessage) Descriptor() ([]byte, []int) { + return file_messages_types_proto_rawDescGZIP(), []int{0} +} + +func (x *ExampleMessage) GetField1() string { + if x != nil { + return x.Field1 + } + return "" +} + +type AllTheTypes struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + Str string `protobuf:"bytes,1,opt,name=str,proto3" json:"str,omitempty"` + Int32 int32 `protobuf:"varint,2,opt,name=int32,proto3" json:"int32,omitempty"` + Int64 int64 `protobuf:"varint,3,opt,name=int64,proto3" json:"int64,omitempty"` + Sint32 int32 `protobuf:"zigzag32,4,opt,name=sint32,proto3" json:"sint32,omitempty"` + Sin64 int64 `protobuf:"zigzag64,5,opt,name=sin64,proto3" json:"sin64,omitempty"` + Uint32 uint32 `protobuf:"varint,6,opt,name=uint32,proto3" json:"uint32,omitempty"` + Uint64 uint64 `protobuf:"varint,7,opt,name=uint64,proto3" json:"uint64,omitempty"` + Fixed32 uint32 `protobuf:"fixed32,8,opt,name=fixed32,proto3" json:"fixed32,omitempty"` + Fixed64 uint64 `protobuf:"fixed64,9,opt,name=fixed64,proto3" json:"fixed64,omitempty"` + Sfixed32 int32 `protobuf:"fixed32,10,opt,name=sfixed32,proto3" json:"sfixed32,omitempty"` + Bool bool `protobuf:"varint,11,opt,name=bool,proto3" json:"bool,omitempty"` + Bytes []byte `protobuf:"bytes,12,opt,name=bytes,proto3" json:"bytes,omitempty"` + Double float64 `protobuf:"fixed64,13,opt,name=double,proto3" json:"double,omitempty"` + Enum AllTheTypes_ExampleEnum `protobuf:"varint,14,opt,name=enum,proto3,enum=AllTheTypes_ExampleEnum" json:"enum,omitempty"` + Message *ExampleMessage `protobuf:"bytes,15,opt,name=message,proto3" json:"message,omitempty"` + // Types that are assignable to Oneof: + // + // *AllTheTypes_Oneofstring + // *AllTheTypes_Oneofmessage + Oneof isAllTheTypes_Oneof `protobuf_oneof:"oneof"` + Any *anypb.Any `protobuf:"bytes,18,opt,name=any,proto3" json:"any,omitempty"` + SimpleMap map[int32]string `protobuf:"bytes,19,rep,name=simple_map,json=simpleMap,proto3" json:"simple_map,omitempty" protobuf_key:"varint,1,opt,name=key,proto3" protobuf_val:"bytes,2,opt,name=value,proto3"` + ComplexMap map[string]*ExampleMessage `protobuf:"bytes,20,rep,name=complex_map,json=complexMap,proto3" json:"complex_map,omitempty" protobuf_key:"bytes,1,opt,name=key,proto3" protobuf_val:"bytes,2,opt,name=value,proto3"` + SimpleList []string `protobuf:"bytes,21,rep,name=simple_list,json=simpleList,proto3" json:"simple_list,omitempty"` + ComplexList []*ExampleMessage `protobuf:"bytes,22,rep,name=complex_list,json=complexList,proto3" json:"complex_list,omitempty"` +} + +func (x *AllTheTypes) Reset() { + *x = AllTheTypes{} + if protoimpl.UnsafeEnabled { + mi := &file_messages_types_proto_msgTypes[1] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *AllTheTypes) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*AllTheTypes) ProtoMessage() {} + +func (x *AllTheTypes) ProtoReflect() protoreflect.Message { + mi := &file_messages_types_proto_msgTypes[1] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use AllTheTypes.ProtoReflect.Descriptor instead. +func (*AllTheTypes) Descriptor() ([]byte, []int) { + return file_messages_types_proto_rawDescGZIP(), []int{1} +} + +func (x *AllTheTypes) GetStr() string { + if x != nil { + return x.Str + } + return "" +} + +func (x *AllTheTypes) GetInt32() int32 { + if x != nil { + return x.Int32 + } + return 0 +} + +func (x *AllTheTypes) GetInt64() int64 { + if x != nil { + return x.Int64 + } + return 0 +} + +func (x *AllTheTypes) GetSint32() int32 { + if x != nil { + return x.Sint32 + } + return 0 +} + +func (x *AllTheTypes) GetSin64() int64 { + if x != nil { + return x.Sin64 + } + return 0 +} + +func (x *AllTheTypes) GetUint32() uint32 { + if x != nil { + return x.Uint32 + } + return 0 +} + +func (x *AllTheTypes) GetUint64() uint64 { + if x != nil { + return x.Uint64 + } + return 0 +} + +func (x *AllTheTypes) GetFixed32() uint32 { + if x != nil { + return x.Fixed32 + } + return 0 +} + +func (x *AllTheTypes) GetFixed64() uint64 { + if x != nil { + return x.Fixed64 + } + return 0 +} + +func (x *AllTheTypes) GetSfixed32() int32 { + if x != nil { + return x.Sfixed32 + } + return 0 +} + +func (x *AllTheTypes) GetBool() bool { + if x != nil { + return x.Bool + } + return false +} + +func (x *AllTheTypes) GetBytes() []byte { + if x != nil { + return x.Bytes + } + return nil +} + +func (x *AllTheTypes) GetDouble() float64 { + if x != nil { + return x.Double + } + return 0 +} + +func (x *AllTheTypes) GetEnum() AllTheTypes_ExampleEnum { + if x != nil { + return x.Enum + } + return AllTheTypes_OPTION_0 +} + +func (x *AllTheTypes) GetMessage() *ExampleMessage { + if x != nil { + return x.Message + } + return nil +} + +func (m *AllTheTypes) GetOneof() isAllTheTypes_Oneof { + if m != nil { + return m.Oneof + } + return nil +} + +func (x *AllTheTypes) GetOneofstring() string { + if x, ok := x.GetOneof().(*AllTheTypes_Oneofstring); ok { + return x.Oneofstring + } + return "" +} + +func (x *AllTheTypes) GetOneofmessage() *ExampleMessage { + if x, ok := x.GetOneof().(*AllTheTypes_Oneofmessage); ok { + return x.Oneofmessage + } + return nil +} + +func (x *AllTheTypes) GetAny() *anypb.Any { + if x != nil { + return x.Any + } + return nil +} + +func (x *AllTheTypes) GetSimpleMap() map[int32]string { + if x != nil { + return x.SimpleMap + } + return nil +} + +func (x *AllTheTypes) GetComplexMap() map[string]*ExampleMessage { + if x != nil { + return x.ComplexMap + } + return nil +} + +func (x *AllTheTypes) GetSimpleList() []string { + if x != nil { + return x.SimpleList + } + return nil +} + +func (x *AllTheTypes) GetComplexList() []*ExampleMessage { + if x != nil { + return x.ComplexList + } + return nil +} + +type isAllTheTypes_Oneof interface { + isAllTheTypes_Oneof() +} + +type AllTheTypes_Oneofstring struct { + Oneofstring string `protobuf:"bytes,16,opt,name=oneofstring,proto3,oneof"` +} + +type AllTheTypes_Oneofmessage struct { + Oneofmessage *ExampleMessage `protobuf:"bytes,17,opt,name=oneofmessage,proto3,oneof"` +} + +func (*AllTheTypes_Oneofstring) isAllTheTypes_Oneof() {} + +func (*AllTheTypes_Oneofmessage) isAllTheTypes_Oneof() {} + +var File_messages_types_proto protoreflect.FileDescriptor + +var file_messages_types_proto_rawDesc = []byte{ + 0x0a, 0x14, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x73, 0x2f, 0x74, 0x79, 0x70, 0x65, 0x73, + 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x1a, 0x19, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2f, 0x70, + 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2f, 0x61, 0x6e, 0x79, 0x2e, 0x70, 0x72, 0x6f, 0x74, + 0x6f, 0x22, 0x28, 0x0a, 0x0e, 0x45, 0x78, 0x61, 0x6d, 0x70, 0x6c, 0x65, 0x4d, 0x65, 0x73, 0x73, + 0x61, 0x67, 0x65, 0x12, 0x16, 0x0a, 0x06, 0x66, 0x69, 0x65, 0x6c, 0x64, 0x31, 0x18, 0x01, 0x20, + 0x01, 0x28, 0x09, 0x52, 0x06, 0x66, 0x69, 0x65, 0x6c, 0x64, 0x31, 0x22, 0xa9, 0x07, 0x0a, 0x0b, + 0x41, 0x6c, 0x6c, 0x54, 0x68, 0x65, 0x54, 0x79, 0x70, 0x65, 0x73, 0x12, 0x10, 0x0a, 0x03, 0x73, + 0x74, 0x72, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x74, 0x72, 0x12, 0x14, 0x0a, + 0x05, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x18, 0x02, 0x20, 0x01, 0x28, 0x05, 0x52, 0x05, 0x69, 0x6e, + 0x74, 0x33, 0x32, 0x12, 0x14, 0x0a, 0x05, 0x69, 0x6e, 0x74, 0x36, 0x34, 0x18, 0x03, 0x20, 0x01, + 0x28, 0x03, 0x52, 0x05, 0x69, 0x6e, 0x74, 0x36, 0x34, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x69, 0x6e, + 0x74, 0x33, 0x32, 0x18, 0x04, 0x20, 0x01, 0x28, 0x11, 0x52, 0x06, 0x73, 0x69, 0x6e, 0x74, 0x33, + 0x32, 0x12, 0x14, 0x0a, 0x05, 0x73, 0x69, 0x6e, 0x36, 0x34, 0x18, 0x05, 0x20, 0x01, 0x28, 0x12, + 0x52, 0x05, 0x73, 0x69, 0x6e, 0x36, 0x34, 0x12, 0x16, 0x0a, 0x06, 0x75, 0x69, 0x6e, 0x74, 0x33, + 0x32, 0x18, 0x06, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x06, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x12, + 0x16, 0x0a, 0x06, 0x75, 0x69, 0x6e, 0x74, 0x36, 0x34, 0x18, 0x07, 0x20, 0x01, 0x28, 0x04, 0x52, + 0x06, 0x75, 0x69, 0x6e, 0x74, 0x36, 0x34, 0x12, 0x18, 0x0a, 0x07, 0x66, 0x69, 0x78, 0x65, 0x64, + 0x33, 0x32, 0x18, 0x08, 0x20, 0x01, 0x28, 0x07, 0x52, 0x07, 0x66, 0x69, 0x78, 0x65, 0x64, 0x33, + 0x32, 0x12, 0x18, 0x0a, 0x07, 0x66, 0x69, 0x78, 0x65, 0x64, 0x36, 0x34, 0x18, 0x09, 0x20, 0x01, + 0x28, 0x06, 0x52, 0x07, 0x66, 0x69, 0x78, 0x65, 0x64, 0x36, 0x34, 0x12, 0x1a, 0x0a, 0x08, 0x73, + 0x66, 0x69, 0x78, 0x65, 0x64, 0x33, 0x32, 0x18, 0x0a, 0x20, 0x01, 0x28, 0x0f, 0x52, 0x08, 0x73, + 0x66, 0x69, 0x78, 0x65, 0x64, 0x33, 0x32, 0x12, 0x12, 0x0a, 0x04, 0x62, 0x6f, 0x6f, 0x6c, 0x18, + 0x0b, 0x20, 0x01, 0x28, 0x08, 0x52, 0x04, 0x62, 0x6f, 0x6f, 0x6c, 0x12, 0x14, 0x0a, 0x05, 0x62, + 0x79, 0x74, 0x65, 0x73, 0x18, 0x0c, 0x20, 0x01, 0x28, 0x0c, 0x52, 0x05, 0x62, 0x79, 0x74, 0x65, + 0x73, 0x12, 0x16, 0x0a, 0x06, 0x64, 0x6f, 0x75, 0x62, 0x6c, 0x65, 0x18, 0x0d, 0x20, 0x01, 0x28, + 0x01, 0x52, 0x06, 0x64, 0x6f, 0x75, 0x62, 0x6c, 0x65, 0x12, 0x2c, 0x0a, 0x04, 0x65, 0x6e, 0x75, + 0x6d, 0x18, 0x0e, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x18, 0x2e, 0x41, 0x6c, 0x6c, 0x54, 0x68, 0x65, + 0x54, 0x79, 0x70, 0x65, 0x73, 0x2e, 0x45, 0x78, 0x61, 0x6d, 0x70, 0x6c, 0x65, 0x45, 0x6e, 0x75, + 0x6d, 0x52, 0x04, 0x65, 0x6e, 0x75, 0x6d, 0x12, 0x29, 0x0a, 0x07, 0x6d, 0x65, 0x73, 0x73, 0x61, + 0x67, 0x65, 0x18, 0x0f, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x0f, 0x2e, 0x45, 0x78, 0x61, 0x6d, 0x70, + 0x6c, 0x65, 0x4d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x52, 0x07, 0x6d, 0x65, 0x73, 0x73, 0x61, + 0x67, 0x65, 0x12, 0x22, 0x0a, 0x0b, 0x6f, 0x6e, 0x65, 0x6f, 0x66, 0x73, 0x74, 0x72, 0x69, 0x6e, + 0x67, 0x18, 0x10, 0x20, 0x01, 0x28, 0x09, 0x48, 0x00, 0x52, 0x0b, 0x6f, 0x6e, 0x65, 0x6f, 0x66, + 0x73, 0x74, 0x72, 0x69, 0x6e, 0x67, 0x12, 0x35, 0x0a, 0x0c, 0x6f, 0x6e, 0x65, 0x6f, 0x66, 0x6d, + 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x18, 0x11, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x0f, 0x2e, 0x45, + 0x78, 0x61, 0x6d, 0x70, 0x6c, 0x65, 0x4d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x48, 0x00, 0x52, + 0x0c, 0x6f, 0x6e, 0x65, 0x6f, 0x66, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x12, 0x26, 0x0a, + 0x03, 0x61, 0x6e, 0x79, 0x18, 0x12, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x14, 0x2e, 0x67, 0x6f, 0x6f, + 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x41, 0x6e, 0x79, + 0x52, 0x03, 0x61, 0x6e, 0x79, 0x12, 0x3a, 0x0a, 0x0a, 0x73, 0x69, 0x6d, 0x70, 0x6c, 0x65, 0x5f, + 0x6d, 0x61, 0x70, 0x18, 0x13, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x1b, 0x2e, 0x41, 0x6c, 0x6c, 0x54, + 0x68, 0x65, 0x54, 0x79, 0x70, 0x65, 0x73, 0x2e, 0x53, 0x69, 0x6d, 0x70, 0x6c, 0x65, 0x4d, 0x61, + 0x70, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x52, 0x09, 0x73, 0x69, 0x6d, 0x70, 0x6c, 0x65, 0x4d, 0x61, + 0x70, 0x12, 0x3d, 0x0a, 0x0b, 0x63, 0x6f, 0x6d, 0x70, 0x6c, 0x65, 0x78, 0x5f, 0x6d, 0x61, 0x70, + 0x18, 0x14, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x1c, 0x2e, 0x41, 0x6c, 0x6c, 0x54, 0x68, 0x65, 0x54, + 0x79, 0x70, 0x65, 0x73, 0x2e, 0x43, 0x6f, 0x6d, 0x70, 0x6c, 0x65, 0x78, 0x4d, 0x61, 0x70, 0x45, + 0x6e, 0x74, 0x72, 0x79, 0x52, 0x0a, 0x63, 0x6f, 0x6d, 0x70, 0x6c, 0x65, 0x78, 0x4d, 0x61, 0x70, + 0x12, 0x1f, 0x0a, 0x0b, 0x73, 0x69, 0x6d, 0x70, 0x6c, 0x65, 0x5f, 0x6c, 0x69, 0x73, 0x74, 0x18, + 0x15, 0x20, 0x03, 0x28, 0x09, 0x52, 0x0a, 0x73, 0x69, 0x6d, 0x70, 0x6c, 0x65, 0x4c, 0x69, 0x73, + 0x74, 0x12, 0x32, 0x0a, 0x0c, 0x63, 0x6f, 0x6d, 0x70, 0x6c, 0x65, 0x78, 0x5f, 0x6c, 0x69, 0x73, + 0x74, 0x18, 0x16, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x0f, 0x2e, 0x45, 0x78, 0x61, 0x6d, 0x70, 0x6c, + 0x65, 0x4d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x52, 0x0b, 0x63, 0x6f, 0x6d, 0x70, 0x6c, 0x65, + 0x78, 0x4c, 0x69, 0x73, 0x74, 0x1a, 0x3c, 0x0a, 0x0e, 0x53, 0x69, 0x6d, 0x70, 0x6c, 0x65, 0x4d, + 0x61, 0x70, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x12, 0x10, 0x0a, 0x03, 0x6b, 0x65, 0x79, 0x18, 0x01, + 0x20, 0x01, 0x28, 0x05, 0x52, 0x03, 0x6b, 0x65, 0x79, 0x12, 0x14, 0x0a, 0x05, 0x76, 0x61, 0x6c, + 0x75, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x3a, + 0x02, 0x38, 0x01, 0x1a, 0x4e, 0x0a, 0x0f, 0x43, 0x6f, 0x6d, 0x70, 0x6c, 0x65, 0x78, 0x4d, 0x61, + 0x70, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x12, 0x10, 0x0a, 0x03, 0x6b, 0x65, 0x79, 0x18, 0x01, 0x20, + 0x01, 0x28, 0x09, 0x52, 0x03, 0x6b, 0x65, 0x79, 0x12, 0x25, 0x0a, 0x05, 0x76, 0x61, 0x6c, 0x75, + 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x0f, 0x2e, 0x45, 0x78, 0x61, 0x6d, 0x70, 0x6c, + 0x65, 0x4d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x52, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x3a, + 0x02, 0x38, 0x01, 0x22, 0x29, 0x0a, 0x0b, 0x45, 0x78, 0x61, 0x6d, 0x70, 0x6c, 0x65, 0x45, 0x6e, + 0x75, 0x6d, 0x12, 0x0c, 0x0a, 0x08, 0x4f, 0x50, 0x54, 0x49, 0x4f, 0x4e, 0x5f, 0x30, 0x10, 0x00, + 0x12, 0x0c, 0x0a, 0x08, 0x4f, 0x50, 0x54, 0x49, 0x4f, 0x4e, 0x5f, 0x31, 0x10, 0x01, 0x42, 0x07, + 0x0a, 0x05, 0x6f, 0x6e, 0x65, 0x6f, 0x66, 0x42, 0x11, 0x5a, 0x0f, 0x2e, 0x2e, 0x2f, 0x75, 0x74, + 0x69, 0x6c, 0x5f, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, + 0x6f, 0x33, +} + +var ( + file_messages_types_proto_rawDescOnce sync.Once + file_messages_types_proto_rawDescData = file_messages_types_proto_rawDesc +) + +func file_messages_types_proto_rawDescGZIP() []byte { + file_messages_types_proto_rawDescOnce.Do(func() { + file_messages_types_proto_rawDescData = protoimpl.X.CompressGZIP(file_messages_types_proto_rawDescData) + }) + return file_messages_types_proto_rawDescData +} + +var file_messages_types_proto_enumTypes = make([]protoimpl.EnumInfo, 1) +var file_messages_types_proto_msgTypes = make([]protoimpl.MessageInfo, 4) +var file_messages_types_proto_goTypes = []interface{}{ + (AllTheTypes_ExampleEnum)(0), // 0: AllTheTypes.ExampleEnum + (*ExampleMessage)(nil), // 1: ExampleMessage + (*AllTheTypes)(nil), // 2: AllTheTypes + nil, // 3: AllTheTypes.SimpleMapEntry + nil, // 4: AllTheTypes.ComplexMapEntry + (*anypb.Any)(nil), // 5: google.protobuf.Any +} +var file_messages_types_proto_depIdxs = []int32{ + 0, // 0: AllTheTypes.enum:type_name -> AllTheTypes.ExampleEnum + 1, // 1: AllTheTypes.message:type_name -> ExampleMessage + 1, // 2: AllTheTypes.oneofmessage:type_name -> ExampleMessage + 5, // 3: AllTheTypes.any:type_name -> google.protobuf.Any + 3, // 4: AllTheTypes.simple_map:type_name -> AllTheTypes.SimpleMapEntry + 4, // 5: AllTheTypes.complex_map:type_name -> AllTheTypes.ComplexMapEntry + 1, // 6: AllTheTypes.complex_list:type_name -> ExampleMessage + 1, // 7: AllTheTypes.ComplexMapEntry.value:type_name -> ExampleMessage + 8, // [8:8] is the sub-list for method output_type + 8, // [8:8] is the sub-list for method input_type + 8, // [8:8] is the sub-list for extension type_name + 8, // [8:8] is the sub-list for extension extendee + 0, // [0:8] is the sub-list for field type_name +} + +func init() { file_messages_types_proto_init() } +func file_messages_types_proto_init() { + if File_messages_types_proto != nil { + return + } + if !protoimpl.UnsafeEnabled { + file_messages_types_proto_msgTypes[0].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*ExampleMessage); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_messages_types_proto_msgTypes[1].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*AllTheTypes); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + } + file_messages_types_proto_msgTypes[1].OneofWrappers = []interface{}{ + (*AllTheTypes_Oneofstring)(nil), + (*AllTheTypes_Oneofmessage)(nil), + } + type x struct{} + out := protoimpl.TypeBuilder{ + File: protoimpl.DescBuilder{ + GoPackagePath: reflect.TypeOf(x{}).PkgPath(), + RawDescriptor: file_messages_types_proto_rawDesc, + NumEnums: 1, + NumMessages: 4, + NumExtensions: 0, + NumServices: 0, + }, + GoTypes: file_messages_types_proto_goTypes, + DependencyIndexes: file_messages_types_proto_depIdxs, + EnumInfos: file_messages_types_proto_enumTypes, + MessageInfos: file_messages_types_proto_msgTypes, + }.Build() + File_messages_types_proto = out.File + file_messages_types_proto_rawDesc = nil + file_messages_types_proto_goTypes = nil + file_messages_types_proto_depIdxs = nil +} diff --git a/go/go.mod b/go/go.mod index 972940ee3c299..b6fccf6735254 100644 --- a/go/go.mod +++ b/go/go.mod @@ -21,9 +21,9 @@ go 1.21 require ( github.com/JohnCGriffin/overflow v0.0.0-20211019200055-46fa312c352c github.com/andybalholm/brotli v1.1.0 - github.com/apache/thrift v0.19.0 + github.com/apache/thrift v0.20.0 github.com/docopt/docopt-go v0.0.0-20180111231733-ee0de3bc6815 - github.com/goccy/go-json v0.10.2 + github.com/goccy/go-json v0.10.3 github.com/golang/snappy v0.0.4 github.com/google/flatbuffers v24.3.25+incompatible github.com/klauspost/asmfmt v1.3.2 @@ -36,18 +36,20 @@ require ( github.com/zeebo/xxh3 v1.0.2 golang.org/x/exp v0.0.0-20240222234643-814bf88cf225 golang.org/x/sync v0.7.0 - golang.org/x/sys v0.19.0 - golang.org/x/tools v0.20.0 + golang.org/x/sys v0.20.0 + golang.org/x/tools v0.21.0 golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028 gonum.org/v1/gonum v0.15.0 google.golang.org/grpc v1.63.2 - google.golang.org/protobuf v1.33.0 + google.golang.org/protobuf v1.34.1 modernc.org/sqlite v1.29.6 ) require ( + github.com/golang/protobuf v1.5.4 github.com/google/uuid v1.6.0 - github.com/hamba/avro/v2 v2.20.1 + github.com/huandu/xstrings v1.4.0 + github.com/hamba/avro/v2 v2.22.1 github.com/substrait-io/substrait-go v0.4.2 github.com/tidwall/sjson v1.2.5 ) @@ -75,8 +77,8 @@ require ( github.com/tidwall/match v1.1.1 // indirect github.com/tidwall/pretty v1.2.0 // indirect golang.org/x/mod v0.17.0 // indirect - golang.org/x/net v0.24.0 // indirect - golang.org/x/text v0.14.0 // indirect + golang.org/x/net v0.25.0 // indirect + golang.org/x/text v0.15.0 // indirect google.golang.org/genproto/googleapis/rpc v0.0.0-20240227224415-6ceb2ff114de // indirect gopkg.in/yaml.v3 v3.0.1 // indirect modernc.org/gc/v3 v3.0.0-20240107210532-573471604cb6 // indirect diff --git a/go/go.sum b/go/go.sum index 0a45cb751f77e..d963493108d86 100644 --- a/go/go.sum +++ b/go/go.sum @@ -8,8 +8,8 @@ github.com/alecthomas/repr v0.2.0 h1:HAzS41CIzNW5syS8Mf9UwXhNH1J9aix/BvDRf1Ml2Yk github.com/alecthomas/repr v0.2.0/go.mod h1:Fr0507jx4eOXV7AlPV6AVZLYrLIuIeSOWtW57eE/O/4= github.com/andybalholm/brotli v1.1.0 h1:eLKJA0d02Lf0mVpIDgYnqXcUn0GqVmEFny3VuID1U3M= github.com/andybalholm/brotli v1.1.0/go.mod h1:sms7XGricyQI9K10gOSf56VKKWS4oLer58Q+mhRPtnY= -github.com/apache/thrift v0.19.0 h1:sOqkWPzMj7w6XaYbJQG7m4sGqVolaW/0D28Ln7yPzMk= -github.com/apache/thrift v0.19.0/go.mod h1:SUALL216IiaOw2Oy+5Vs9lboJ/t9g40C+G07Dc0QC1I= +github.com/apache/thrift v0.20.0 h1:631+KvYbsBZxmuJjYwhezVsrfc/TbqtZV4QcxOX1fOI= +github.com/apache/thrift v0.20.0/go.mod h1:hOk1BQqcp2OLzGsyVXdfMk7YFlMxK3aoEVhjD06QhB8= github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= @@ -26,8 +26,8 @@ github.com/go-playground/universal-translator v0.17.0 h1:icxd5fm+REJzpZx7ZfpaD87 github.com/go-playground/universal-translator v0.17.0/go.mod h1:UkSxE5sNxxRwHyU+Scu5vgOQjsIJAF8j9muTVoKLVtA= github.com/go-playground/validator/v10 v10.4.1 h1:pH2c5ADXtd66mxoE0Zm9SUhxE20r7aM3F26W0hOn+GE= github.com/go-playground/validator/v10 v10.4.1/go.mod h1:nlOn6nFhuKACm19sB/8EGNn9GlaMV7XkbRSipzJ0Ii4= -github.com/goccy/go-json v0.10.2 h1:CrxCmQqYDkv1z7lO7Wbh2HN93uovUHgrECaO5ZrCXAU= -github.com/goccy/go-json v0.10.2/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MGFi0w8I= +github.com/goccy/go-json v0.10.3 h1:KZ5WoDbxAIgm2HNbYckL0se1fHD6rz5j4ywS6ebzDqA= +github.com/goccy/go-json v0.10.3/go.mod h1:oq7eo15ShAhp70Anwd5lgX2pLfOS3QCiwU/PULtXL6M= github.com/goccy/go-yaml v1.11.0 h1:n7Z+zx8S9f9KgzG6KtQKf+kwqXZlLNR2F6018Dgau54= github.com/goccy/go-yaml v1.11.0/go.mod h1:H+mJrWtjPTJAHvRbV09MCK9xYwODM+wRTVFFTWckfng= github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek= @@ -43,12 +43,14 @@ github.com/google/pprof v0.0.0-20221118152302-e6195bd50e26 h1:Xim43kblpZXfIBQsbu github.com/google/pprof v0.0.0-20221118152302-e6195bd50e26/go.mod h1:dDKJzRmX4S37WGHujM7tX//fmj1uioxKzKxz3lo4HJo= github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= -github.com/hamba/avro/v2 v2.20.1 h1:3WByQiVn7wT7d27WQq6pvBRC00FVOrniP6u67FLA/2E= -github.com/hamba/avro/v2 v2.20.1/go.mod h1:xHiKXbISpb3Ovc809XdzWow+XGTn+Oyf/F9aZbTLAig= +github.com/hamba/avro/v2 v2.22.1 h1:q1rAbfJsrbMaZPDLQvwUQMfQzp6H+hGXvckmU/lXemk= +github.com/hamba/avro/v2 v2.22.1/go.mod h1:HOeTrE3kvWnBAgsufqhAzDDV5gvS0QXs65Z6BHfGgbg= github.com/hashicorp/golang-lru/v2 v2.0.7 h1:a+bsQ5rvGLjzHuww6tVxozPZFVghXaHOwFs4luLUK2k= github.com/hashicorp/golang-lru/v2 v2.0.7/go.mod h1:QeFd9opnmA6QUJc5vARoKUSoFhyfM2/ZepoAG6RGpeM= github.com/hexops/gotextdiff v1.0.3 h1:gitA9+qJrrTCsiCl7+kh75nPqQt1cx4ZkudSTLoUqJM= github.com/hexops/gotextdiff v1.0.3/go.mod h1:pSWU5MAI3yDq+fZBTazCSJysOMbxWL1BSow5/V2vxeg= +github.com/huandu/xstrings v1.4.0 h1:D17IlohoQq4UcpqD7fDk80P7l+lwAmlFaBHgOipl2FU= +github.com/huandu/xstrings v1.4.0/go.mod h1:y5/lhBue+AyNmUVz9RLU9xbLR0o4KIIExikq4ovT0aE= github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= github.com/klauspost/asmfmt v1.3.2 h1:4Ri7ox3EwapiOjCki+hw14RyKk201CN4rzyCJRFLpK4= @@ -111,25 +113,25 @@ github.com/zeebo/assert v1.3.0 h1:g7C04CbJuIDKNPFHmsk4hwZDO5O+kntRxzaUoNXj+IQ= github.com/zeebo/assert v1.3.0/go.mod h1:Pq9JiuJQpG8JLJdtkwrJESF0Foym2/D9XMU5ciN/wJ0= github.com/zeebo/xxh3 v1.0.2 h1:xZmwmqxHZA8AI603jOQ0tMqmBr9lPeFwGg6d+xy9DC0= github.com/zeebo/xxh3 v1.0.2/go.mod h1:5NWz9Sef7zIDm2JHfFlcQvNekmcEl9ekUZQQKCYaDcA= -golang.org/x/crypto v0.22.0 h1:g1v0xeRhjcugydODzvb3mEM9SQ0HGp9s/nh3COQ/C30= -golang.org/x/crypto v0.22.0/go.mod h1:vr6Su+7cTlO45qkww3VDJlzDn0ctJvRgYbC2NvXHt+M= +golang.org/x/crypto v0.23.0 h1:dIJU/v2J8Mdglj/8rJ6UUOM3Zc9zLZxVZwwxMooUSAI= +golang.org/x/crypto v0.23.0/go.mod h1:CKFgDieR+mRhux2Lsu27y0fO304Db0wZe70UKqHu0v8= golang.org/x/exp v0.0.0-20240222234643-814bf88cf225 h1:LfspQV/FYTatPTr/3HzIcmiUFH7PGP+OQ6mgDYo3yuQ= golang.org/x/exp v0.0.0-20240222234643-814bf88cf225/go.mod h1:CxmFvTBINI24O/j8iY7H1xHzx2i4OsyguNBmN/uPtqc= golang.org/x/mod v0.17.0 h1:zY54UmvipHiNd+pm+m0x9KhZ9hl1/7QNMyxXbc6ICqA= golang.org/x/mod v0.17.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c= -golang.org/x/net v0.24.0 h1:1PcaxkF854Fu3+lvBIx5SYn9wRlBzzcnHZSiaFFAb0w= -golang.org/x/net v0.24.0/go.mod h1:2Q7sJY5mzlzWjKtYUEXSlBWCdyaioyXzRB2RtU8KVE8= +golang.org/x/net v0.25.0 h1:d/OCCoBEUq33pjydKrGQhw7IlUPI2Oylr+8qLx49kac= +golang.org/x/net v0.25.0/go.mod h1:JkAGAh7GEvH74S6FOH42FLoXpXbE/aqXSrIQjXgsiwM= golang.org/x/sync v0.7.0 h1:YsImfSBoP9QPYL0xyKJPq0gcaJdG3rInoqxTWbfQu9M= golang.org/x/sync v0.7.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.19.0 h1:q5f1RH2jigJ1MoAWp2KTp3gm5zAGFUTarQZ5U386+4o= -golang.org/x/sys v0.19.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= -golang.org/x/text v0.14.0 h1:ScX5w1eTa3QqT8oi6+ziP7dTV1S2+ALU0bI+0zXKWiQ= -golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= -golang.org/x/tools v0.20.0 h1:hz/CVckiOxybQvFw6h7b/q80NTr9IUQb4s1IIzW7KNY= -golang.org/x/tools v0.20.0/go.mod h1:WvitBU7JJf6A4jOdg4S1tviW9bhUxkgeCui/0JHctQg= +golang.org/x/sys v0.20.0 h1:Od9JTbYCk261bKm4M/mw7AklTlFYIa0bIp9BgSm1S8Y= +golang.org/x/sys v0.20.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/text v0.15.0 h1:h1V/4gjBv8v9cjcR6+AR5+/cIYK5N/WAgiv4xlsEtAk= +golang.org/x/text v0.15.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= +golang.org/x/tools v0.21.0 h1:qc0xYgIbsSDt9EyWz05J5wfa7LOVW0YTLOXrqdLAWIw= +golang.org/x/tools v0.21.0/go.mod h1:aiJjzUbINMkxbQROHiO6hDPo2LHcIPhhQsa9DLh0yGk= golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028 h1:+cNy6SZtPcJQH3LJVLOSmiC7MMxXNOb3PU/VUEz+EhU= golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028/go.mod h1:NDW/Ps6MPRej6fsCIbMTohpP40sJ/P/vI1MoTEGwX90= gonum.org/v1/gonum v0.15.0 h1:2lYxjRbTYyxkJxlhC+LvJIx3SsANPdRybu1tGj9/OrQ= @@ -138,8 +140,8 @@ google.golang.org/genproto/googleapis/rpc v0.0.0-20240227224415-6ceb2ff114de h1: google.golang.org/genproto/googleapis/rpc v0.0.0-20240227224415-6ceb2ff114de/go.mod h1:H4O17MA/PE9BsGx3w+a+W2VOLLD1Qf7oJneAoU6WktY= google.golang.org/grpc v1.63.2 h1:MUeiw1B2maTVZthpU5xvASfTh3LDbxHd6IJ6QQVU+xM= google.golang.org/grpc v1.63.2/go.mod h1:WAX/8DgncnokcFUldAxq7GeB5DXHDbMF+lLvDomNkRA= -google.golang.org/protobuf v1.33.0 h1:uNO2rsAINq/JlFpSdYEKIZ0uKD/R9cpdv0T+yoGwGmI= -google.golang.org/protobuf v1.33.0/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos= +google.golang.org/protobuf v1.34.1 h1:9ddQBjfCyZPOHPUiPxpYESBLc+T8P3E+Vo4IbKZgFWg= +google.golang.org/protobuf v1.34.1/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= diff --git a/go/internal/utils/min_max_arm64.go b/go/internal/utils/min_max_arm64.go index 7404e95d963e3..d02849665df56 100644 --- a/go/internal/utils/min_max_arm64.go +++ b/go/internal/utils/min_max_arm64.go @@ -21,8 +21,9 @@ package utils import ( "os" "strings" + + "golang.org/x/sys/cpu" ) -import "golang.org/x/sys/cpu" func init() { // Added ability to enable extension via environment: diff --git a/go/parquet/doc.go b/go/parquet/doc.go index ff42be6498d8c..6ab08f83f063f 100644 --- a/go/parquet/doc.go +++ b/go/parquet/doc.go @@ -26,14 +26,15 @@ // This implementation is a native go implementation for reading and writing the // parquet file format. // -// Install +// # Install // // You can download the library and cli utilities via: -// go get -u github.com/apache/arrow/go/v17/parquet -// go install github.com/apache/arrow/go/v17/parquet/cmd/parquet_reader@latest -// go install github.com/apache/arrow/go/v17/parquet/cmd/parquet_schema@latest // -// Modules +// go get -u github.com/apache/arrow/go/v17/parquet +// go install github.com/apache/arrow/go/v17/parquet/cmd/parquet_reader@latest +// go install github.com/apache/arrow/go/v17/parquet/cmd/parquet_schema@latest +// +// # Modules // // This top level parquet package contains the basic common types and reader/writer // properties along with some utilities that are used throughout the other modules. @@ -50,13 +51,13 @@ // The schema module contains the types for manipulating / inspecting / creating // parquet file schemas. // -// Primitive Types +// # Primitive Types // // The Parquet Primitive Types and their corresponding Go types are Boolean (bool), // Int32 (int32), Int64 (int64), Int96 (parquet.Int96), Float (float32), Double (float64), // ByteArray (parquet.ByteArray) and FixedLenByteArray (parquet.FixedLenByteArray). // -// Encodings +// # Encodings // // The encoding types supported in this package are: // Plain, Plain/RLE Dictionary, Delta Binary Packed (only integer types), Delta Byte Array diff --git a/go/parquet/internal/bmi/bitmap_bmi2_amd64.go b/go/parquet/internal/bmi/bitmap_bmi2_amd64.go index ab6dcec40b02b..7fe5a1654911e 100644 --- a/go/parquet/internal/bmi/bitmap_bmi2_amd64.go +++ b/go/parquet/internal/bmi/bitmap_bmi2_amd64.go @@ -14,6 +14,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build !noasm // +build !noasm package bmi diff --git a/go/parquet/internal/bmi/bitmap_bmi2_noasm.go b/go/parquet/internal/bmi/bitmap_bmi2_noasm.go index 6dc4a39a60e5a..03be648e011a7 100644 --- a/go/parquet/internal/bmi/bitmap_bmi2_noasm.go +++ b/go/parquet/internal/bmi/bitmap_bmi2_noasm.go @@ -14,6 +14,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build noasm // +build noasm package bmi diff --git a/go/parquet/internal/bmi/bitmap_bmi2_ppc64le.go b/go/parquet/internal/bmi/bitmap_bmi2_ppc64le.go index 498d5452e17ad..60f898f6bd557 100644 --- a/go/parquet/internal/bmi/bitmap_bmi2_ppc64le.go +++ b/go/parquet/internal/bmi/bitmap_bmi2_ppc64le.go @@ -14,6 +14,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build !noasm // +build !noasm package bmi diff --git a/go/parquet/internal/bmi/bitmap_bmi2_s390x.go b/go/parquet/internal/bmi/bitmap_bmi2_s390x.go index 498d5452e17ad..60f898f6bd557 100644 --- a/go/parquet/internal/bmi/bitmap_bmi2_s390x.go +++ b/go/parquet/internal/bmi/bitmap_bmi2_s390x.go @@ -14,6 +14,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build !noasm // +build !noasm package bmi diff --git a/go/parquet/internal/bmi/bmi_amd64.go b/go/parquet/internal/bmi/bmi_amd64.go index 600ef024f69a8..f894b160d4c8b 100644 --- a/go/parquet/internal/bmi/bmi_amd64.go +++ b/go/parquet/internal/bmi/bmi_amd64.go @@ -14,6 +14,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build !noasm // +build !noasm package bmi diff --git a/go/parquet/internal/debug/assert_off.go b/go/parquet/internal/debug/assert_off.go index 52b9a233169d2..1450ecc98a26e 100644 --- a/go/parquet/internal/debug/assert_off.go +++ b/go/parquet/internal/debug/assert_off.go @@ -14,6 +14,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build !assert // +build !assert package debug diff --git a/go/parquet/internal/debug/assert_on.go b/go/parquet/internal/debug/assert_on.go index 188e683120466..1a47460fd542a 100644 --- a/go/parquet/internal/debug/assert_on.go +++ b/go/parquet/internal/debug/assert_on.go @@ -14,6 +14,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build assert // +build assert package debug diff --git a/go/parquet/internal/debug/doc.go b/go/parquet/internal/debug/doc.go index 61684d625380d..d3965793a0825 100644 --- a/go/parquet/internal/debug/doc.go +++ b/go/parquet/internal/debug/doc.go @@ -16,7 +16,7 @@ // Package debug provides APIs for conditional runtime assertions and debug logging. // -// Using Assert +// # Using Assert // // To enable runtime assertions, build with the assert tag. When the assert tag is omitted, // the code for the assertion will be omitted from the binary. diff --git a/go/parquet/internal/debug/log_off.go b/go/parquet/internal/debug/log_off.go index 23dcccd810ce4..09f0e09a5ed1d 100644 --- a/go/parquet/internal/debug/log_off.go +++ b/go/parquet/internal/debug/log_off.go @@ -14,6 +14,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build !debug // +build !debug package debug diff --git a/go/parquet/internal/debug/log_on.go b/go/parquet/internal/debug/log_on.go index 8d6106099f6f0..0067e442d3693 100644 --- a/go/parquet/internal/debug/log_on.go +++ b/go/parquet/internal/debug/log_on.go @@ -14,6 +14,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build debug // +build debug package debug diff --git a/go/parquet/internal/encoding/delta_byte_array_test.go b/go/parquet/internal/encoding/delta_byte_array_test.go index 1e5e6b2d676ef..c2e4e6849396e 100644 --- a/go/parquet/internal/encoding/delta_byte_array_test.go +++ b/go/parquet/internal/encoding/delta_byte_array_test.go @@ -18,10 +18,11 @@ package encoding import ( "fmt" + "testing" + "github.com/apache/arrow/go/v17/arrow/memory" "github.com/apache/arrow/go/v17/parquet" "github.com/stretchr/testify/assert" - "testing" ) func TestDeltaByteArrayDecoder_SetData(t *testing.T) { diff --git a/go/parquet/internal/encoding/types.go b/go/parquet/internal/encoding/types.go index 51f48c797488f..2d7a5d6b1d166 100644 --- a/go/parquet/internal/encoding/types.go +++ b/go/parquet/internal/encoding/types.go @@ -185,7 +185,7 @@ func (b *PooledBufferWriter) Reserve(nbytes int) { b.buf = bufferPool.Get().(*memory.Buffer) } - newCap := utils.Max(b.buf.Cap()+b.offset, 256) + newCap := utils.Max(b.buf.Cap(), 256) for newCap < b.pos+nbytes { newCap = bitutil.NextPowerOf2(newCap) } @@ -361,11 +361,16 @@ func (b *BufferWriter) Truncate() { func (b *BufferWriter) Reset(initial int) { if b.buffer != nil { b.buffer.Release() + } else { + b.buffer = memory.NewResizableBuffer(b.mem) } b.pos = 0 b.offset = 0 - b.Reserve(initial) + + if initial > 0 { + b.Reserve(initial) + } } // Reserve ensures that there is at least enough capacity to write nbytes diff --git a/go/parquet/internal/gen-go/parquet/GoUnusedProtection__.go b/go/parquet/internal/gen-go/parquet/GoUnusedProtection__.go index 01f1eb5aa99e3..c2a8e5415ed64 100644 --- a/go/parquet/internal/gen-go/parquet/GoUnusedProtection__.go +++ b/go/parquet/internal/gen-go/parquet/GoUnusedProtection__.go @@ -2,5 +2,4 @@ package parquet -var GoUnusedProtection__ int; - +var GoUnusedProtection__ int diff --git a/go/parquet/internal/gen-go/parquet/parquet-consts.go b/go/parquet/internal/gen-go/parquet/parquet-consts.go index ab0a73c596e7d..f83e0be7640ff 100644 --- a/go/parquet/internal/gen-go/parquet/parquet-consts.go +++ b/go/parquet/internal/gen-go/parquet/parquet-consts.go @@ -7,10 +7,11 @@ import ( "context" "errors" "fmt" + "regexp" + "strings" "time" + thrift "github.com/apache/thrift/lib/go/thrift" - "strings" - "regexp" ) // (needed to ensure safety because of naive import list construction.) @@ -20,11 +21,10 @@ var _ = errors.New var _ = context.Background var _ = time.Now var _ = bytes.Equal + // (needed by validator.) var _ = strings.Contains var _ = regexp.MatchString - func init() { } - diff --git a/go/parquet/internal/gen-go/parquet/parquet.go b/go/parquet/internal/gen-go/parquet/parquet.go index 9dcedae8888d3..5b616d1335150 100644 --- a/go/parquet/internal/gen-go/parquet/parquet.go +++ b/go/parquet/internal/gen-go/parquet/parquet.go @@ -8,10 +8,11 @@ import ( "database/sql/driver" "errors" "fmt" + "regexp" + "strings" "time" + thrift "github.com/apache/thrift/lib/go/thrift" - "strings" - "regexp" ) // (needed to ensure safety because of naive import list construction.) @@ -21,1336 +22,1547 @@ var _ = errors.New var _ = context.Background var _ = time.Now var _ = bytes.Equal + // (needed by validator.) var _ = strings.Contains var _ = regexp.MatchString -//Types supported by Parquet. These types are intended to be used in combination -//with the encodings to control the on disk storage format. -//For example INT16 is not included as a type since a good encoding of INT32 -//would handle this. +// Types supported by Parquet. These types are intended to be used in combination +// with the encodings to control the on disk storage format. +// For example INT16 is not included as a type since a good encoding of INT32 +// would handle this. type Type int64 + const ( - Type_BOOLEAN Type = 0 - Type_INT32 Type = 1 - Type_INT64 Type = 2 - Type_INT96 Type = 3 - Type_FLOAT Type = 4 - Type_DOUBLE Type = 5 - Type_BYTE_ARRAY Type = 6 - Type_FIXED_LEN_BYTE_ARRAY Type = 7 + Type_BOOLEAN Type = 0 + Type_INT32 Type = 1 + Type_INT64 Type = 2 + Type_INT96 Type = 3 + Type_FLOAT Type = 4 + Type_DOUBLE Type = 5 + Type_BYTE_ARRAY Type = 6 + Type_FIXED_LEN_BYTE_ARRAY Type = 7 ) func (p Type) String() string { - switch p { - case Type_BOOLEAN: return "BOOLEAN" - case Type_INT32: return "INT32" - case Type_INT64: return "INT64" - case Type_INT96: return "INT96" - case Type_FLOAT: return "FLOAT" - case Type_DOUBLE: return "DOUBLE" - case Type_BYTE_ARRAY: return "BYTE_ARRAY" - case Type_FIXED_LEN_BYTE_ARRAY: return "FIXED_LEN_BYTE_ARRAY" - } - return "" + switch p { + case Type_BOOLEAN: + return "BOOLEAN" + case Type_INT32: + return "INT32" + case Type_INT64: + return "INT64" + case Type_INT96: + return "INT96" + case Type_FLOAT: + return "FLOAT" + case Type_DOUBLE: + return "DOUBLE" + case Type_BYTE_ARRAY: + return "BYTE_ARRAY" + case Type_FIXED_LEN_BYTE_ARRAY: + return "FIXED_LEN_BYTE_ARRAY" + } + return "" } func TypeFromString(s string) (Type, error) { - switch s { - case "BOOLEAN": return Type_BOOLEAN, nil - case "INT32": return Type_INT32, nil - case "INT64": return Type_INT64, nil - case "INT96": return Type_INT96, nil - case "FLOAT": return Type_FLOAT, nil - case "DOUBLE": return Type_DOUBLE, nil - case "BYTE_ARRAY": return Type_BYTE_ARRAY, nil - case "FIXED_LEN_BYTE_ARRAY": return Type_FIXED_LEN_BYTE_ARRAY, nil - } - return Type(0), fmt.Errorf("not a valid Type string") + switch s { + case "BOOLEAN": + return Type_BOOLEAN, nil + case "INT32": + return Type_INT32, nil + case "INT64": + return Type_INT64, nil + case "INT96": + return Type_INT96, nil + case "FLOAT": + return Type_FLOAT, nil + case "DOUBLE": + return Type_DOUBLE, nil + case "BYTE_ARRAY": + return Type_BYTE_ARRAY, nil + case "FIXED_LEN_BYTE_ARRAY": + return Type_FIXED_LEN_BYTE_ARRAY, nil + } + return Type(0), fmt.Errorf("not a valid Type string") } - func TypePtr(v Type) *Type { return &v } func (p Type) MarshalText() ([]byte, error) { -return []byte(p.String()), nil + return []byte(p.String()), nil } func (p *Type) UnmarshalText(text []byte) error { -q, err := TypeFromString(string(text)) -if (err != nil) { -return err -} -*p = q -return nil + q, err := TypeFromString(string(text)) + if err != nil { + return err + } + *p = q + return nil } func (p *Type) Scan(value interface{}) error { -v, ok := value.(int64) -if !ok { -return errors.New("Scan value is not int64") -} -*p = Type(v) -return nil + v, ok := value.(int64) + if !ok { + return errors.New("Scan value is not int64") + } + *p = Type(v) + return nil } -func (p * Type) Value() (driver.Value, error) { - if p == nil { - return nil, nil - } -return int64(*p), nil +func (p *Type) Value() (driver.Value, error) { + if p == nil { + return nil, nil + } + return int64(*p), nil } -//DEPRECATED: Common types used by frameworks(e.g. hive, pig) using parquet. -//ConvertedType is superseded by LogicalType. This enum should not be extended. + +// DEPRECATED: Common types used by frameworks(e.g. hive, pig) using parquet. +// ConvertedType is superseded by LogicalType. This enum should not be extended. // -//See LogicalTypes.md for conversion between ConvertedType and LogicalType. +// See LogicalTypes.md for conversion between ConvertedType and LogicalType. type ConvertedType int64 + const ( - ConvertedType_UTF8 ConvertedType = 0 - ConvertedType_MAP ConvertedType = 1 - ConvertedType_MAP_KEY_VALUE ConvertedType = 2 - ConvertedType_LIST ConvertedType = 3 - ConvertedType_ENUM ConvertedType = 4 - ConvertedType_DECIMAL ConvertedType = 5 - ConvertedType_DATE ConvertedType = 6 - ConvertedType_TIME_MILLIS ConvertedType = 7 - ConvertedType_TIME_MICROS ConvertedType = 8 - ConvertedType_TIMESTAMP_MILLIS ConvertedType = 9 - ConvertedType_TIMESTAMP_MICROS ConvertedType = 10 - ConvertedType_UINT_8 ConvertedType = 11 - ConvertedType_UINT_16 ConvertedType = 12 - ConvertedType_UINT_32 ConvertedType = 13 - ConvertedType_UINT_64 ConvertedType = 14 - ConvertedType_INT_8 ConvertedType = 15 - ConvertedType_INT_16 ConvertedType = 16 - ConvertedType_INT_32 ConvertedType = 17 - ConvertedType_INT_64 ConvertedType = 18 - ConvertedType_JSON ConvertedType = 19 - ConvertedType_BSON ConvertedType = 20 - ConvertedType_INTERVAL ConvertedType = 21 + ConvertedType_UTF8 ConvertedType = 0 + ConvertedType_MAP ConvertedType = 1 + ConvertedType_MAP_KEY_VALUE ConvertedType = 2 + ConvertedType_LIST ConvertedType = 3 + ConvertedType_ENUM ConvertedType = 4 + ConvertedType_DECIMAL ConvertedType = 5 + ConvertedType_DATE ConvertedType = 6 + ConvertedType_TIME_MILLIS ConvertedType = 7 + ConvertedType_TIME_MICROS ConvertedType = 8 + ConvertedType_TIMESTAMP_MILLIS ConvertedType = 9 + ConvertedType_TIMESTAMP_MICROS ConvertedType = 10 + ConvertedType_UINT_8 ConvertedType = 11 + ConvertedType_UINT_16 ConvertedType = 12 + ConvertedType_UINT_32 ConvertedType = 13 + ConvertedType_UINT_64 ConvertedType = 14 + ConvertedType_INT_8 ConvertedType = 15 + ConvertedType_INT_16 ConvertedType = 16 + ConvertedType_INT_32 ConvertedType = 17 + ConvertedType_INT_64 ConvertedType = 18 + ConvertedType_JSON ConvertedType = 19 + ConvertedType_BSON ConvertedType = 20 + ConvertedType_INTERVAL ConvertedType = 21 ) func (p ConvertedType) String() string { - switch p { - case ConvertedType_UTF8: return "UTF8" - case ConvertedType_MAP: return "MAP" - case ConvertedType_MAP_KEY_VALUE: return "MAP_KEY_VALUE" - case ConvertedType_LIST: return "LIST" - case ConvertedType_ENUM: return "ENUM" - case ConvertedType_DECIMAL: return "DECIMAL" - case ConvertedType_DATE: return "DATE" - case ConvertedType_TIME_MILLIS: return "TIME_MILLIS" - case ConvertedType_TIME_MICROS: return "TIME_MICROS" - case ConvertedType_TIMESTAMP_MILLIS: return "TIMESTAMP_MILLIS" - case ConvertedType_TIMESTAMP_MICROS: return "TIMESTAMP_MICROS" - case ConvertedType_UINT_8: return "UINT_8" - case ConvertedType_UINT_16: return "UINT_16" - case ConvertedType_UINT_32: return "UINT_32" - case ConvertedType_UINT_64: return "UINT_64" - case ConvertedType_INT_8: return "INT_8" - case ConvertedType_INT_16: return "INT_16" - case ConvertedType_INT_32: return "INT_32" - case ConvertedType_INT_64: return "INT_64" - case ConvertedType_JSON: return "JSON" - case ConvertedType_BSON: return "BSON" - case ConvertedType_INTERVAL: return "INTERVAL" - } - return "" + switch p { + case ConvertedType_UTF8: + return "UTF8" + case ConvertedType_MAP: + return "MAP" + case ConvertedType_MAP_KEY_VALUE: + return "MAP_KEY_VALUE" + case ConvertedType_LIST: + return "LIST" + case ConvertedType_ENUM: + return "ENUM" + case ConvertedType_DECIMAL: + return "DECIMAL" + case ConvertedType_DATE: + return "DATE" + case ConvertedType_TIME_MILLIS: + return "TIME_MILLIS" + case ConvertedType_TIME_MICROS: + return "TIME_MICROS" + case ConvertedType_TIMESTAMP_MILLIS: + return "TIMESTAMP_MILLIS" + case ConvertedType_TIMESTAMP_MICROS: + return "TIMESTAMP_MICROS" + case ConvertedType_UINT_8: + return "UINT_8" + case ConvertedType_UINT_16: + return "UINT_16" + case ConvertedType_UINT_32: + return "UINT_32" + case ConvertedType_UINT_64: + return "UINT_64" + case ConvertedType_INT_8: + return "INT_8" + case ConvertedType_INT_16: + return "INT_16" + case ConvertedType_INT_32: + return "INT_32" + case ConvertedType_INT_64: + return "INT_64" + case ConvertedType_JSON: + return "JSON" + case ConvertedType_BSON: + return "BSON" + case ConvertedType_INTERVAL: + return "INTERVAL" + } + return "" } func ConvertedTypeFromString(s string) (ConvertedType, error) { - switch s { - case "UTF8": return ConvertedType_UTF8, nil - case "MAP": return ConvertedType_MAP, nil - case "MAP_KEY_VALUE": return ConvertedType_MAP_KEY_VALUE, nil - case "LIST": return ConvertedType_LIST, nil - case "ENUM": return ConvertedType_ENUM, nil - case "DECIMAL": return ConvertedType_DECIMAL, nil - case "DATE": return ConvertedType_DATE, nil - case "TIME_MILLIS": return ConvertedType_TIME_MILLIS, nil - case "TIME_MICROS": return ConvertedType_TIME_MICROS, nil - case "TIMESTAMP_MILLIS": return ConvertedType_TIMESTAMP_MILLIS, nil - case "TIMESTAMP_MICROS": return ConvertedType_TIMESTAMP_MICROS, nil - case "UINT_8": return ConvertedType_UINT_8, nil - case "UINT_16": return ConvertedType_UINT_16, nil - case "UINT_32": return ConvertedType_UINT_32, nil - case "UINT_64": return ConvertedType_UINT_64, nil - case "INT_8": return ConvertedType_INT_8, nil - case "INT_16": return ConvertedType_INT_16, nil - case "INT_32": return ConvertedType_INT_32, nil - case "INT_64": return ConvertedType_INT_64, nil - case "JSON": return ConvertedType_JSON, nil - case "BSON": return ConvertedType_BSON, nil - case "INTERVAL": return ConvertedType_INTERVAL, nil - } - return ConvertedType(0), fmt.Errorf("not a valid ConvertedType string") + switch s { + case "UTF8": + return ConvertedType_UTF8, nil + case "MAP": + return ConvertedType_MAP, nil + case "MAP_KEY_VALUE": + return ConvertedType_MAP_KEY_VALUE, nil + case "LIST": + return ConvertedType_LIST, nil + case "ENUM": + return ConvertedType_ENUM, nil + case "DECIMAL": + return ConvertedType_DECIMAL, nil + case "DATE": + return ConvertedType_DATE, nil + case "TIME_MILLIS": + return ConvertedType_TIME_MILLIS, nil + case "TIME_MICROS": + return ConvertedType_TIME_MICROS, nil + case "TIMESTAMP_MILLIS": + return ConvertedType_TIMESTAMP_MILLIS, nil + case "TIMESTAMP_MICROS": + return ConvertedType_TIMESTAMP_MICROS, nil + case "UINT_8": + return ConvertedType_UINT_8, nil + case "UINT_16": + return ConvertedType_UINT_16, nil + case "UINT_32": + return ConvertedType_UINT_32, nil + case "UINT_64": + return ConvertedType_UINT_64, nil + case "INT_8": + return ConvertedType_INT_8, nil + case "INT_16": + return ConvertedType_INT_16, nil + case "INT_32": + return ConvertedType_INT_32, nil + case "INT_64": + return ConvertedType_INT_64, nil + case "JSON": + return ConvertedType_JSON, nil + case "BSON": + return ConvertedType_BSON, nil + case "INTERVAL": + return ConvertedType_INTERVAL, nil + } + return ConvertedType(0), fmt.Errorf("not a valid ConvertedType string") } - func ConvertedTypePtr(v ConvertedType) *ConvertedType { return &v } func (p ConvertedType) MarshalText() ([]byte, error) { -return []byte(p.String()), nil + return []byte(p.String()), nil } func (p *ConvertedType) UnmarshalText(text []byte) error { -q, err := ConvertedTypeFromString(string(text)) -if (err != nil) { -return err -} -*p = q -return nil + q, err := ConvertedTypeFromString(string(text)) + if err != nil { + return err + } + *p = q + return nil } func (p *ConvertedType) Scan(value interface{}) error { -v, ok := value.(int64) -if !ok { -return errors.New("Scan value is not int64") -} -*p = ConvertedType(v) -return nil + v, ok := value.(int64) + if !ok { + return errors.New("Scan value is not int64") + } + *p = ConvertedType(v) + return nil } -func (p * ConvertedType) Value() (driver.Value, error) { - if p == nil { - return nil, nil - } -return int64(*p), nil +func (p *ConvertedType) Value() (driver.Value, error) { + if p == nil { + return nil, nil + } + return int64(*p), nil } -//Representation of Schemas + +// Representation of Schemas type FieldRepetitionType int64 + const ( - FieldRepetitionType_REQUIRED FieldRepetitionType = 0 - FieldRepetitionType_OPTIONAL FieldRepetitionType = 1 - FieldRepetitionType_REPEATED FieldRepetitionType = 2 + FieldRepetitionType_REQUIRED FieldRepetitionType = 0 + FieldRepetitionType_OPTIONAL FieldRepetitionType = 1 + FieldRepetitionType_REPEATED FieldRepetitionType = 2 ) func (p FieldRepetitionType) String() string { - switch p { - case FieldRepetitionType_REQUIRED: return "REQUIRED" - case FieldRepetitionType_OPTIONAL: return "OPTIONAL" - case FieldRepetitionType_REPEATED: return "REPEATED" - } - return "" + switch p { + case FieldRepetitionType_REQUIRED: + return "REQUIRED" + case FieldRepetitionType_OPTIONAL: + return "OPTIONAL" + case FieldRepetitionType_REPEATED: + return "REPEATED" + } + return "" } func FieldRepetitionTypeFromString(s string) (FieldRepetitionType, error) { - switch s { - case "REQUIRED": return FieldRepetitionType_REQUIRED, nil - case "OPTIONAL": return FieldRepetitionType_OPTIONAL, nil - case "REPEATED": return FieldRepetitionType_REPEATED, nil - } - return FieldRepetitionType(0), fmt.Errorf("not a valid FieldRepetitionType string") + switch s { + case "REQUIRED": + return FieldRepetitionType_REQUIRED, nil + case "OPTIONAL": + return FieldRepetitionType_OPTIONAL, nil + case "REPEATED": + return FieldRepetitionType_REPEATED, nil + } + return FieldRepetitionType(0), fmt.Errorf("not a valid FieldRepetitionType string") } - func FieldRepetitionTypePtr(v FieldRepetitionType) *FieldRepetitionType { return &v } func (p FieldRepetitionType) MarshalText() ([]byte, error) { -return []byte(p.String()), nil + return []byte(p.String()), nil } func (p *FieldRepetitionType) UnmarshalText(text []byte) error { -q, err := FieldRepetitionTypeFromString(string(text)) -if (err != nil) { -return err -} -*p = q -return nil + q, err := FieldRepetitionTypeFromString(string(text)) + if err != nil { + return err + } + *p = q + return nil } func (p *FieldRepetitionType) Scan(value interface{}) error { -v, ok := value.(int64) -if !ok { -return errors.New("Scan value is not int64") -} -*p = FieldRepetitionType(v) -return nil + v, ok := value.(int64) + if !ok { + return errors.New("Scan value is not int64") + } + *p = FieldRepetitionType(v) + return nil } -func (p * FieldRepetitionType) Value() (driver.Value, error) { - if p == nil { - return nil, nil - } -return int64(*p), nil +func (p *FieldRepetitionType) Value() (driver.Value, error) { + if p == nil { + return nil, nil + } + return int64(*p), nil } -//Encodings supported by Parquet. Not all encodings are valid for all types. These -//enums are also used to specify the encoding of definition and repetition levels. -//See the accompanying doc for the details of the more complicated encodings. + +// Encodings supported by Parquet. Not all encodings are valid for all types. These +// enums are also used to specify the encoding of definition and repetition levels. +// See the accompanying doc for the details of the more complicated encodings. type Encoding int64 + const ( - Encoding_PLAIN Encoding = 0 - Encoding_PLAIN_DICTIONARY Encoding = 2 - Encoding_RLE Encoding = 3 - Encoding_BIT_PACKED Encoding = 4 - Encoding_DELTA_BINARY_PACKED Encoding = 5 - Encoding_DELTA_LENGTH_BYTE_ARRAY Encoding = 6 - Encoding_DELTA_BYTE_ARRAY Encoding = 7 - Encoding_RLE_DICTIONARY Encoding = 8 - Encoding_BYTE_STREAM_SPLIT Encoding = 9 + Encoding_PLAIN Encoding = 0 + Encoding_PLAIN_DICTIONARY Encoding = 2 + Encoding_RLE Encoding = 3 + Encoding_BIT_PACKED Encoding = 4 + Encoding_DELTA_BINARY_PACKED Encoding = 5 + Encoding_DELTA_LENGTH_BYTE_ARRAY Encoding = 6 + Encoding_DELTA_BYTE_ARRAY Encoding = 7 + Encoding_RLE_DICTIONARY Encoding = 8 + Encoding_BYTE_STREAM_SPLIT Encoding = 9 ) func (p Encoding) String() string { - switch p { - case Encoding_PLAIN: return "PLAIN" - case Encoding_PLAIN_DICTIONARY: return "PLAIN_DICTIONARY" - case Encoding_RLE: return "RLE" - case Encoding_BIT_PACKED: return "BIT_PACKED" - case Encoding_DELTA_BINARY_PACKED: return "DELTA_BINARY_PACKED" - case Encoding_DELTA_LENGTH_BYTE_ARRAY: return "DELTA_LENGTH_BYTE_ARRAY" - case Encoding_DELTA_BYTE_ARRAY: return "DELTA_BYTE_ARRAY" - case Encoding_RLE_DICTIONARY: return "RLE_DICTIONARY" - case Encoding_BYTE_STREAM_SPLIT: return "BYTE_STREAM_SPLIT" - } - return "" + switch p { + case Encoding_PLAIN: + return "PLAIN" + case Encoding_PLAIN_DICTIONARY: + return "PLAIN_DICTIONARY" + case Encoding_RLE: + return "RLE" + case Encoding_BIT_PACKED: + return "BIT_PACKED" + case Encoding_DELTA_BINARY_PACKED: + return "DELTA_BINARY_PACKED" + case Encoding_DELTA_LENGTH_BYTE_ARRAY: + return "DELTA_LENGTH_BYTE_ARRAY" + case Encoding_DELTA_BYTE_ARRAY: + return "DELTA_BYTE_ARRAY" + case Encoding_RLE_DICTIONARY: + return "RLE_DICTIONARY" + case Encoding_BYTE_STREAM_SPLIT: + return "BYTE_STREAM_SPLIT" + } + return "" } func EncodingFromString(s string) (Encoding, error) { - switch s { - case "PLAIN": return Encoding_PLAIN, nil - case "PLAIN_DICTIONARY": return Encoding_PLAIN_DICTIONARY, nil - case "RLE": return Encoding_RLE, nil - case "BIT_PACKED": return Encoding_BIT_PACKED, nil - case "DELTA_BINARY_PACKED": return Encoding_DELTA_BINARY_PACKED, nil - case "DELTA_LENGTH_BYTE_ARRAY": return Encoding_DELTA_LENGTH_BYTE_ARRAY, nil - case "DELTA_BYTE_ARRAY": return Encoding_DELTA_BYTE_ARRAY, nil - case "RLE_DICTIONARY": return Encoding_RLE_DICTIONARY, nil - case "BYTE_STREAM_SPLIT": return Encoding_BYTE_STREAM_SPLIT, nil - } - return Encoding(0), fmt.Errorf("not a valid Encoding string") + switch s { + case "PLAIN": + return Encoding_PLAIN, nil + case "PLAIN_DICTIONARY": + return Encoding_PLAIN_DICTIONARY, nil + case "RLE": + return Encoding_RLE, nil + case "BIT_PACKED": + return Encoding_BIT_PACKED, nil + case "DELTA_BINARY_PACKED": + return Encoding_DELTA_BINARY_PACKED, nil + case "DELTA_LENGTH_BYTE_ARRAY": + return Encoding_DELTA_LENGTH_BYTE_ARRAY, nil + case "DELTA_BYTE_ARRAY": + return Encoding_DELTA_BYTE_ARRAY, nil + case "RLE_DICTIONARY": + return Encoding_RLE_DICTIONARY, nil + case "BYTE_STREAM_SPLIT": + return Encoding_BYTE_STREAM_SPLIT, nil + } + return Encoding(0), fmt.Errorf("not a valid Encoding string") } - func EncodingPtr(v Encoding) *Encoding { return &v } func (p Encoding) MarshalText() ([]byte, error) { -return []byte(p.String()), nil + return []byte(p.String()), nil } func (p *Encoding) UnmarshalText(text []byte) error { -q, err := EncodingFromString(string(text)) -if (err != nil) { -return err -} -*p = q -return nil + q, err := EncodingFromString(string(text)) + if err != nil { + return err + } + *p = q + return nil } func (p *Encoding) Scan(value interface{}) error { -v, ok := value.(int64) -if !ok { -return errors.New("Scan value is not int64") -} -*p = Encoding(v) -return nil + v, ok := value.(int64) + if !ok { + return errors.New("Scan value is not int64") + } + *p = Encoding(v) + return nil } -func (p * Encoding) Value() (driver.Value, error) { - if p == nil { - return nil, nil - } -return int64(*p), nil +func (p *Encoding) Value() (driver.Value, error) { + if p == nil { + return nil, nil + } + return int64(*p), nil } -//Supported compression algorithms. + +// Supported compression algorithms. // -//Codecs added in format version X.Y can be read by readers based on X.Y and later. -//Codec support may vary between readers based on the format version and -//libraries available at runtime. +// Codecs added in format version X.Y can be read by readers based on X.Y and later. +// Codec support may vary between readers based on the format version and +// libraries available at runtime. // -//See Compression.md for a detailed specification of these algorithms. +// See Compression.md for a detailed specification of these algorithms. type CompressionCodec int64 + const ( - CompressionCodec_UNCOMPRESSED CompressionCodec = 0 - CompressionCodec_SNAPPY CompressionCodec = 1 - CompressionCodec_GZIP CompressionCodec = 2 - CompressionCodec_LZO CompressionCodec = 3 - CompressionCodec_BROTLI CompressionCodec = 4 - CompressionCodec_LZ4 CompressionCodec = 5 - CompressionCodec_ZSTD CompressionCodec = 6 - CompressionCodec_LZ4_RAW CompressionCodec = 7 + CompressionCodec_UNCOMPRESSED CompressionCodec = 0 + CompressionCodec_SNAPPY CompressionCodec = 1 + CompressionCodec_GZIP CompressionCodec = 2 + CompressionCodec_LZO CompressionCodec = 3 + CompressionCodec_BROTLI CompressionCodec = 4 + CompressionCodec_LZ4 CompressionCodec = 5 + CompressionCodec_ZSTD CompressionCodec = 6 + CompressionCodec_LZ4_RAW CompressionCodec = 7 ) func (p CompressionCodec) String() string { - switch p { - case CompressionCodec_UNCOMPRESSED: return "UNCOMPRESSED" - case CompressionCodec_SNAPPY: return "SNAPPY" - case CompressionCodec_GZIP: return "GZIP" - case CompressionCodec_LZO: return "LZO" - case CompressionCodec_BROTLI: return "BROTLI" - case CompressionCodec_LZ4: return "LZ4" - case CompressionCodec_ZSTD: return "ZSTD" - case CompressionCodec_LZ4_RAW: return "LZ4_RAW" - } - return "" + switch p { + case CompressionCodec_UNCOMPRESSED: + return "UNCOMPRESSED" + case CompressionCodec_SNAPPY: + return "SNAPPY" + case CompressionCodec_GZIP: + return "GZIP" + case CompressionCodec_LZO: + return "LZO" + case CompressionCodec_BROTLI: + return "BROTLI" + case CompressionCodec_LZ4: + return "LZ4" + case CompressionCodec_ZSTD: + return "ZSTD" + case CompressionCodec_LZ4_RAW: + return "LZ4_RAW" + } + return "" } func CompressionCodecFromString(s string) (CompressionCodec, error) { - switch s { - case "UNCOMPRESSED": return CompressionCodec_UNCOMPRESSED, nil - case "SNAPPY": return CompressionCodec_SNAPPY, nil - case "GZIP": return CompressionCodec_GZIP, nil - case "LZO": return CompressionCodec_LZO, nil - case "BROTLI": return CompressionCodec_BROTLI, nil - case "LZ4": return CompressionCodec_LZ4, nil - case "ZSTD": return CompressionCodec_ZSTD, nil - case "LZ4_RAW": return CompressionCodec_LZ4_RAW, nil - } - return CompressionCodec(0), fmt.Errorf("not a valid CompressionCodec string") + switch s { + case "UNCOMPRESSED": + return CompressionCodec_UNCOMPRESSED, nil + case "SNAPPY": + return CompressionCodec_SNAPPY, nil + case "GZIP": + return CompressionCodec_GZIP, nil + case "LZO": + return CompressionCodec_LZO, nil + case "BROTLI": + return CompressionCodec_BROTLI, nil + case "LZ4": + return CompressionCodec_LZ4, nil + case "ZSTD": + return CompressionCodec_ZSTD, nil + case "LZ4_RAW": + return CompressionCodec_LZ4_RAW, nil + } + return CompressionCodec(0), fmt.Errorf("not a valid CompressionCodec string") } - func CompressionCodecPtr(v CompressionCodec) *CompressionCodec { return &v } func (p CompressionCodec) MarshalText() ([]byte, error) { -return []byte(p.String()), nil + return []byte(p.String()), nil } func (p *CompressionCodec) UnmarshalText(text []byte) error { -q, err := CompressionCodecFromString(string(text)) -if (err != nil) { -return err -} -*p = q -return nil + q, err := CompressionCodecFromString(string(text)) + if err != nil { + return err + } + *p = q + return nil } func (p *CompressionCodec) Scan(value interface{}) error { -v, ok := value.(int64) -if !ok { -return errors.New("Scan value is not int64") -} -*p = CompressionCodec(v) -return nil + v, ok := value.(int64) + if !ok { + return errors.New("Scan value is not int64") + } + *p = CompressionCodec(v) + return nil } -func (p * CompressionCodec) Value() (driver.Value, error) { - if p == nil { - return nil, nil - } -return int64(*p), nil +func (p *CompressionCodec) Value() (driver.Value, error) { + if p == nil { + return nil, nil + } + return int64(*p), nil } + type PageType int64 + const ( - PageType_DATA_PAGE PageType = 0 - PageType_INDEX_PAGE PageType = 1 - PageType_DICTIONARY_PAGE PageType = 2 - PageType_DATA_PAGE_V2 PageType = 3 + PageType_DATA_PAGE PageType = 0 + PageType_INDEX_PAGE PageType = 1 + PageType_DICTIONARY_PAGE PageType = 2 + PageType_DATA_PAGE_V2 PageType = 3 ) func (p PageType) String() string { - switch p { - case PageType_DATA_PAGE: return "DATA_PAGE" - case PageType_INDEX_PAGE: return "INDEX_PAGE" - case PageType_DICTIONARY_PAGE: return "DICTIONARY_PAGE" - case PageType_DATA_PAGE_V2: return "DATA_PAGE_V2" - } - return "" + switch p { + case PageType_DATA_PAGE: + return "DATA_PAGE" + case PageType_INDEX_PAGE: + return "INDEX_PAGE" + case PageType_DICTIONARY_PAGE: + return "DICTIONARY_PAGE" + case PageType_DATA_PAGE_V2: + return "DATA_PAGE_V2" + } + return "" } func PageTypeFromString(s string) (PageType, error) { - switch s { - case "DATA_PAGE": return PageType_DATA_PAGE, nil - case "INDEX_PAGE": return PageType_INDEX_PAGE, nil - case "DICTIONARY_PAGE": return PageType_DICTIONARY_PAGE, nil - case "DATA_PAGE_V2": return PageType_DATA_PAGE_V2, nil - } - return PageType(0), fmt.Errorf("not a valid PageType string") + switch s { + case "DATA_PAGE": + return PageType_DATA_PAGE, nil + case "INDEX_PAGE": + return PageType_INDEX_PAGE, nil + case "DICTIONARY_PAGE": + return PageType_DICTIONARY_PAGE, nil + case "DATA_PAGE_V2": + return PageType_DATA_PAGE_V2, nil + } + return PageType(0), fmt.Errorf("not a valid PageType string") } - func PageTypePtr(v PageType) *PageType { return &v } func (p PageType) MarshalText() ([]byte, error) { -return []byte(p.String()), nil + return []byte(p.String()), nil } func (p *PageType) UnmarshalText(text []byte) error { -q, err := PageTypeFromString(string(text)) -if (err != nil) { -return err -} -*p = q -return nil + q, err := PageTypeFromString(string(text)) + if err != nil { + return err + } + *p = q + return nil } func (p *PageType) Scan(value interface{}) error { -v, ok := value.(int64) -if !ok { -return errors.New("Scan value is not int64") -} -*p = PageType(v) -return nil + v, ok := value.(int64) + if !ok { + return errors.New("Scan value is not int64") + } + *p = PageType(v) + return nil } -func (p * PageType) Value() (driver.Value, error) { - if p == nil { - return nil, nil - } -return int64(*p), nil +func (p *PageType) Value() (driver.Value, error) { + if p == nil { + return nil, nil + } + return int64(*p), nil } -//Enum to annotate whether lists of min/max elements inside ColumnIndex -//are ordered and if so, in which direction. + +// Enum to annotate whether lists of min/max elements inside ColumnIndex +// are ordered and if so, in which direction. type BoundaryOrder int64 + const ( - BoundaryOrder_UNORDERED BoundaryOrder = 0 - BoundaryOrder_ASCENDING BoundaryOrder = 1 - BoundaryOrder_DESCENDING BoundaryOrder = 2 + BoundaryOrder_UNORDERED BoundaryOrder = 0 + BoundaryOrder_ASCENDING BoundaryOrder = 1 + BoundaryOrder_DESCENDING BoundaryOrder = 2 ) func (p BoundaryOrder) String() string { - switch p { - case BoundaryOrder_UNORDERED: return "UNORDERED" - case BoundaryOrder_ASCENDING: return "ASCENDING" - case BoundaryOrder_DESCENDING: return "DESCENDING" - } - return "" + switch p { + case BoundaryOrder_UNORDERED: + return "UNORDERED" + case BoundaryOrder_ASCENDING: + return "ASCENDING" + case BoundaryOrder_DESCENDING: + return "DESCENDING" + } + return "" } func BoundaryOrderFromString(s string) (BoundaryOrder, error) { - switch s { - case "UNORDERED": return BoundaryOrder_UNORDERED, nil - case "ASCENDING": return BoundaryOrder_ASCENDING, nil - case "DESCENDING": return BoundaryOrder_DESCENDING, nil - } - return BoundaryOrder(0), fmt.Errorf("not a valid BoundaryOrder string") + switch s { + case "UNORDERED": + return BoundaryOrder_UNORDERED, nil + case "ASCENDING": + return BoundaryOrder_ASCENDING, nil + case "DESCENDING": + return BoundaryOrder_DESCENDING, nil + } + return BoundaryOrder(0), fmt.Errorf("not a valid BoundaryOrder string") } - func BoundaryOrderPtr(v BoundaryOrder) *BoundaryOrder { return &v } func (p BoundaryOrder) MarshalText() ([]byte, error) { -return []byte(p.String()), nil + return []byte(p.String()), nil } func (p *BoundaryOrder) UnmarshalText(text []byte) error { -q, err := BoundaryOrderFromString(string(text)) -if (err != nil) { -return err -} -*p = q -return nil + q, err := BoundaryOrderFromString(string(text)) + if err != nil { + return err + } + *p = q + return nil } func (p *BoundaryOrder) Scan(value interface{}) error { -v, ok := value.(int64) -if !ok { -return errors.New("Scan value is not int64") -} -*p = BoundaryOrder(v) -return nil + v, ok := value.(int64) + if !ok { + return errors.New("Scan value is not int64") + } + *p = BoundaryOrder(v) + return nil } -func (p * BoundaryOrder) Value() (driver.Value, error) { - if p == nil { - return nil, nil - } -return int64(*p), nil +func (p *BoundaryOrder) Value() (driver.Value, error) { + if p == nil { + return nil, nil + } + return int64(*p), nil } + // Statistics per row group and per page // All fields are optional. -// +// // Attributes: -// - Max: DEPRECATED: min and max value of the column. Use min_value and max_value. -// +// - Max: DEPRECATED: min and max value of the column. Use min_value and max_value. +// // Values are encoded using PLAIN encoding, except that variable-length byte // arrays do not include a length prefix. -// +// // These fields encode min and max values determined by signed comparison // only. New files should use the correct order for a column's logical type // and store the values in the min_value and max_value fields. -// +// // To support older readers, these may be set when the column order is // signed. -// - Min -// - NullCount: count of null value in the column -// - DistinctCount: count of distinct values occurring -// - MaxValue: Min and max values for the column, determined by its ColumnOrder. -// +// - Min +// - NullCount: count of null value in the column +// - DistinctCount: count of distinct values occurring +// - MaxValue: Min and max values for the column, determined by its ColumnOrder. +// // Values are encoded using PLAIN encoding, except that variable-length byte // arrays do not include a length prefix. -// - MinValue +// - MinValue type Statistics struct { - Max []byte `thrift:"max,1" db:"max" json:"max,omitempty"` - Min []byte `thrift:"min,2" db:"min" json:"min,omitempty"` - NullCount *int64 `thrift:"null_count,3" db:"null_count" json:"null_count,omitempty"` - DistinctCount *int64 `thrift:"distinct_count,4" db:"distinct_count" json:"distinct_count,omitempty"` - MaxValue []byte `thrift:"max_value,5" db:"max_value" json:"max_value,omitempty"` - MinValue []byte `thrift:"min_value,6" db:"min_value" json:"min_value,omitempty"` + Max []byte `thrift:"max,1" db:"max" json:"max,omitempty"` + Min []byte `thrift:"min,2" db:"min" json:"min,omitempty"` + NullCount *int64 `thrift:"null_count,3" db:"null_count" json:"null_count,omitempty"` + DistinctCount *int64 `thrift:"distinct_count,4" db:"distinct_count" json:"distinct_count,omitempty"` + MaxValue []byte `thrift:"max_value,5" db:"max_value" json:"max_value,omitempty"` + MinValue []byte `thrift:"min_value,6" db:"min_value" json:"min_value,omitempty"` } func NewStatistics() *Statistics { - return &Statistics{} + return &Statistics{} } var Statistics_Max_DEFAULT []byte func (p *Statistics) GetMax() []byte { - return p.Max + return p.Max } + var Statistics_Min_DEFAULT []byte func (p *Statistics) GetMin() []byte { - return p.Min + return p.Min } + var Statistics_NullCount_DEFAULT int64 + func (p *Statistics) GetNullCount() int64 { - if !p.IsSetNullCount() { - return Statistics_NullCount_DEFAULT - } -return *p.NullCount + if !p.IsSetNullCount() { + return Statistics_NullCount_DEFAULT + } + return *p.NullCount } + var Statistics_DistinctCount_DEFAULT int64 + func (p *Statistics) GetDistinctCount() int64 { - if !p.IsSetDistinctCount() { - return Statistics_DistinctCount_DEFAULT - } -return *p.DistinctCount + if !p.IsSetDistinctCount() { + return Statistics_DistinctCount_DEFAULT + } + return *p.DistinctCount } + var Statistics_MaxValue_DEFAULT []byte func (p *Statistics) GetMaxValue() []byte { - return p.MaxValue + return p.MaxValue } + var Statistics_MinValue_DEFAULT []byte func (p *Statistics) GetMinValue() []byte { - return p.MinValue + return p.MinValue } func (p *Statistics) IsSetMax() bool { - return p.Max != nil + return p.Max != nil } func (p *Statistics) IsSetMin() bool { - return p.Min != nil + return p.Min != nil } func (p *Statistics) IsSetNullCount() bool { - return p.NullCount != nil + return p.NullCount != nil } func (p *Statistics) IsSetDistinctCount() bool { - return p.DistinctCount != nil + return p.DistinctCount != nil } func (p *Statistics) IsSetMaxValue() bool { - return p.MaxValue != nil + return p.MaxValue != nil } func (p *Statistics) IsSetMinValue() bool { - return p.MinValue != nil + return p.MinValue != nil } func (p *Statistics) Read(ctx context.Context, iprot thrift.TProtocol) error { - if _, err := iprot.ReadStructBegin(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) - } - - - for { - _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) - if err != nil { - return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) - } - if fieldTypeId == thrift.STOP { break; } - switch fieldId { - case 1: - if fieldTypeId == thrift.STRING { - if err := p.ReadField1(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 2: - if fieldTypeId == thrift.STRING { - if err := p.ReadField2(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 3: - if fieldTypeId == thrift.I64 { - if err := p.ReadField3(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 4: - if fieldTypeId == thrift.I64 { - if err := p.ReadField4(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 5: - if fieldTypeId == thrift.STRING { - if err := p.ReadField5(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 6: - if fieldTypeId == thrift.STRING { - if err := p.ReadField6(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - default: - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - if err := iprot.ReadFieldEnd(ctx); err != nil { - return err - } - } - if err := iprot.ReadStructEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) - } - return nil -} - -func (p *Statistics) ReadField1(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadBinary(ctx); err != nil { - return thrift.PrependError("error reading field 1: ", err) -} else { - p.Max = v -} - return nil -} - -func (p *Statistics) ReadField2(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadBinary(ctx); err != nil { - return thrift.PrependError("error reading field 2: ", err) -} else { - p.Min = v -} - return nil -} - -func (p *Statistics) ReadField3(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadI64(ctx); err != nil { - return thrift.PrependError("error reading field 3: ", err) -} else { - p.NullCount = &v -} - return nil -} - -func (p *Statistics) ReadField4(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadI64(ctx); err != nil { - return thrift.PrependError("error reading field 4: ", err) -} else { - p.DistinctCount = &v -} - return nil -} - -func (p *Statistics) ReadField5(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadBinary(ctx); err != nil { - return thrift.PrependError("error reading field 5: ", err) -} else { - p.MaxValue = v -} - return nil -} - -func (p *Statistics) ReadField6(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadBinary(ctx); err != nil { - return thrift.PrependError("error reading field 6: ", err) -} else { - p.MinValue = v -} - return nil + if _, err := iprot.ReadStructBegin(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) + } + + for { + _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) + if err != nil { + return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) + } + if fieldTypeId == thrift.STOP { + break + } + switch fieldId { + case 1: + if fieldTypeId == thrift.STRING { + if err := p.ReadField1(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 2: + if fieldTypeId == thrift.STRING { + if err := p.ReadField2(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 3: + if fieldTypeId == thrift.I64 { + if err := p.ReadField3(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 4: + if fieldTypeId == thrift.I64 { + if err := p.ReadField4(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 5: + if fieldTypeId == thrift.STRING { + if err := p.ReadField5(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 6: + if fieldTypeId == thrift.STRING { + if err := p.ReadField6(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + default: + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + if err := iprot.ReadFieldEnd(ctx); err != nil { + return err + } + } + if err := iprot.ReadStructEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) + } + return nil +} + +func (p *Statistics) ReadField1(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadBinary(ctx); err != nil { + return thrift.PrependError("error reading field 1: ", err) + } else { + p.Max = v + } + return nil +} + +func (p *Statistics) ReadField2(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadBinary(ctx); err != nil { + return thrift.PrependError("error reading field 2: ", err) + } else { + p.Min = v + } + return nil +} + +func (p *Statistics) ReadField3(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadI64(ctx); err != nil { + return thrift.PrependError("error reading field 3: ", err) + } else { + p.NullCount = &v + } + return nil +} + +func (p *Statistics) ReadField4(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadI64(ctx); err != nil { + return thrift.PrependError("error reading field 4: ", err) + } else { + p.DistinctCount = &v + } + return nil +} + +func (p *Statistics) ReadField5(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadBinary(ctx); err != nil { + return thrift.PrependError("error reading field 5: ", err) + } else { + p.MaxValue = v + } + return nil +} + +func (p *Statistics) ReadField6(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadBinary(ctx); err != nil { + return thrift.PrependError("error reading field 6: ", err) + } else { + p.MinValue = v + } + return nil } func (p *Statistics) Write(ctx context.Context, oprot thrift.TProtocol) error { - if err := oprot.WriteStructBegin(ctx, "Statistics"); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) } - if p != nil { - if err := p.writeField1(ctx, oprot); err != nil { return err } - if err := p.writeField2(ctx, oprot); err != nil { return err } - if err := p.writeField3(ctx, oprot); err != nil { return err } - if err := p.writeField4(ctx, oprot); err != nil { return err } - if err := p.writeField5(ctx, oprot); err != nil { return err } - if err := p.writeField6(ctx, oprot); err != nil { return err } - } - if err := oprot.WriteFieldStop(ctx); err != nil { - return thrift.PrependError("write field stop error: ", err) } - if err := oprot.WriteStructEnd(ctx); err != nil { - return thrift.PrependError("write struct stop error: ", err) } - return nil + if err := oprot.WriteStructBegin(ctx, "Statistics"); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) + } + if p != nil { + if err := p.writeField1(ctx, oprot); err != nil { + return err + } + if err := p.writeField2(ctx, oprot); err != nil { + return err + } + if err := p.writeField3(ctx, oprot); err != nil { + return err + } + if err := p.writeField4(ctx, oprot); err != nil { + return err + } + if err := p.writeField5(ctx, oprot); err != nil { + return err + } + if err := p.writeField6(ctx, oprot); err != nil { + return err + } + } + if err := oprot.WriteFieldStop(ctx); err != nil { + return thrift.PrependError("write field stop error: ", err) + } + if err := oprot.WriteStructEnd(ctx); err != nil { + return thrift.PrependError("write struct stop error: ", err) + } + return nil } func (p *Statistics) writeField1(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetMax() { - if err := oprot.WriteFieldBegin(ctx, "max", thrift.STRING, 1); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:max: ", p), err) } - if err := oprot.WriteBinary(ctx, p.Max); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.max (1) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 1:max: ", p), err) } - } - return err + if p.IsSetMax() { + if err := oprot.WriteFieldBegin(ctx, "max", thrift.STRING, 1); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:max: ", p), err) + } + if err := oprot.WriteBinary(ctx, p.Max); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.max (1) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 1:max: ", p), err) + } + } + return err } func (p *Statistics) writeField2(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetMin() { - if err := oprot.WriteFieldBegin(ctx, "min", thrift.STRING, 2); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:min: ", p), err) } - if err := oprot.WriteBinary(ctx, p.Min); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.min (2) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 2:min: ", p), err) } - } - return err + if p.IsSetMin() { + if err := oprot.WriteFieldBegin(ctx, "min", thrift.STRING, 2); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:min: ", p), err) + } + if err := oprot.WriteBinary(ctx, p.Min); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.min (2) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 2:min: ", p), err) + } + } + return err } func (p *Statistics) writeField3(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetNullCount() { - if err := oprot.WriteFieldBegin(ctx, "null_count", thrift.I64, 3); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 3:null_count: ", p), err) } - if err := oprot.WriteI64(ctx, int64(*p.NullCount)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.null_count (3) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 3:null_count: ", p), err) } - } - return err + if p.IsSetNullCount() { + if err := oprot.WriteFieldBegin(ctx, "null_count", thrift.I64, 3); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 3:null_count: ", p), err) + } + if err := oprot.WriteI64(ctx, int64(*p.NullCount)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.null_count (3) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 3:null_count: ", p), err) + } + } + return err } func (p *Statistics) writeField4(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetDistinctCount() { - if err := oprot.WriteFieldBegin(ctx, "distinct_count", thrift.I64, 4); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 4:distinct_count: ", p), err) } - if err := oprot.WriteI64(ctx, int64(*p.DistinctCount)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.distinct_count (4) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 4:distinct_count: ", p), err) } - } - return err + if p.IsSetDistinctCount() { + if err := oprot.WriteFieldBegin(ctx, "distinct_count", thrift.I64, 4); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 4:distinct_count: ", p), err) + } + if err := oprot.WriteI64(ctx, int64(*p.DistinctCount)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.distinct_count (4) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 4:distinct_count: ", p), err) + } + } + return err } func (p *Statistics) writeField5(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetMaxValue() { - if err := oprot.WriteFieldBegin(ctx, "max_value", thrift.STRING, 5); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 5:max_value: ", p), err) } - if err := oprot.WriteBinary(ctx, p.MaxValue); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.max_value (5) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 5:max_value: ", p), err) } - } - return err + if p.IsSetMaxValue() { + if err := oprot.WriteFieldBegin(ctx, "max_value", thrift.STRING, 5); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 5:max_value: ", p), err) + } + if err := oprot.WriteBinary(ctx, p.MaxValue); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.max_value (5) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 5:max_value: ", p), err) + } + } + return err } func (p *Statistics) writeField6(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetMinValue() { - if err := oprot.WriteFieldBegin(ctx, "min_value", thrift.STRING, 6); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 6:min_value: ", p), err) } - if err := oprot.WriteBinary(ctx, p.MinValue); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.min_value (6) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 6:min_value: ", p), err) } - } - return err + if p.IsSetMinValue() { + if err := oprot.WriteFieldBegin(ctx, "min_value", thrift.STRING, 6); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 6:min_value: ", p), err) + } + if err := oprot.WriteBinary(ctx, p.MinValue); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.min_value (6) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 6:min_value: ", p), err) + } + } + return err } func (p *Statistics) Equals(other *Statistics) bool { - if p == other { - return true - } else if p == nil || other == nil { - return false - } - if bytes.Compare(p.Max, other.Max) != 0 { return false } - if bytes.Compare(p.Min, other.Min) != 0 { return false } - if p.NullCount != other.NullCount { - if p.NullCount == nil || other.NullCount == nil { - return false - } - if (*p.NullCount) != (*other.NullCount) { return false } - } - if p.DistinctCount != other.DistinctCount { - if p.DistinctCount == nil || other.DistinctCount == nil { - return false - } - if (*p.DistinctCount) != (*other.DistinctCount) { return false } - } - if bytes.Compare(p.MaxValue, other.MaxValue) != 0 { return false } - if bytes.Compare(p.MinValue, other.MinValue) != 0 { return false } - return true + if p == other { + return true + } else if p == nil || other == nil { + return false + } + if bytes.Compare(p.Max, other.Max) != 0 { + return false + } + if bytes.Compare(p.Min, other.Min) != 0 { + return false + } + if p.NullCount != other.NullCount { + if p.NullCount == nil || other.NullCount == nil { + return false + } + if (*p.NullCount) != (*other.NullCount) { + return false + } + } + if p.DistinctCount != other.DistinctCount { + if p.DistinctCount == nil || other.DistinctCount == nil { + return false + } + if (*p.DistinctCount) != (*other.DistinctCount) { + return false + } + } + if bytes.Compare(p.MaxValue, other.MaxValue) != 0 { + return false + } + if bytes.Compare(p.MinValue, other.MinValue) != 0 { + return false + } + return true } func (p *Statistics) String() string { - if p == nil { - return "" - } - return fmt.Sprintf("Statistics(%+v)", *p) + if p == nil { + return "" + } + return fmt.Sprintf("Statistics(%+v)", *p) } func (p *Statistics) Validate() error { - return nil + return nil } + // Empty structs to use as logical type annotations type StringType struct { } func NewStringType() *StringType { - return &StringType{} + return &StringType{} } func (p *StringType) Read(ctx context.Context, iprot thrift.TProtocol) error { - if _, err := iprot.ReadStructBegin(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) - } - - - for { - _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) - if err != nil { - return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) - } - if fieldTypeId == thrift.STOP { break; } - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - if err := iprot.ReadFieldEnd(ctx); err != nil { - return err - } - } - if err := iprot.ReadStructEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) - } - return nil + if _, err := iprot.ReadStructBegin(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) + } + + for { + _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) + if err != nil { + return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) + } + if fieldTypeId == thrift.STOP { + break + } + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + if err := iprot.ReadFieldEnd(ctx); err != nil { + return err + } + } + if err := iprot.ReadStructEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) + } + return nil } func (p *StringType) Write(ctx context.Context, oprot thrift.TProtocol) error { - if err := oprot.WriteStructBegin(ctx, "StringType"); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) } - if p != nil { - } - if err := oprot.WriteFieldStop(ctx); err != nil { - return thrift.PrependError("write field stop error: ", err) } - if err := oprot.WriteStructEnd(ctx); err != nil { - return thrift.PrependError("write struct stop error: ", err) } - return nil + if err := oprot.WriteStructBegin(ctx, "StringType"); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) + } + if p != nil { + } + if err := oprot.WriteFieldStop(ctx); err != nil { + return thrift.PrependError("write field stop error: ", err) + } + if err := oprot.WriteStructEnd(ctx); err != nil { + return thrift.PrependError("write struct stop error: ", err) + } + return nil } func (p *StringType) Equals(other *StringType) bool { - if p == other { - return true - } else if p == nil || other == nil { - return false - } - return true + if p == other { + return true + } else if p == nil || other == nil { + return false + } + return true } func (p *StringType) String() string { - if p == nil { - return "" - } - return fmt.Sprintf("StringType(%+v)", *p) + if p == nil { + return "" + } + return fmt.Sprintf("StringType(%+v)", *p) } func (p *StringType) Validate() error { - return nil + return nil } + type UUIDType struct { } func NewUUIDType() *UUIDType { - return &UUIDType{} + return &UUIDType{} } func (p *UUIDType) Read(ctx context.Context, iprot thrift.TProtocol) error { - if _, err := iprot.ReadStructBegin(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) - } - - - for { - _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) - if err != nil { - return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) - } - if fieldTypeId == thrift.STOP { break; } - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - if err := iprot.ReadFieldEnd(ctx); err != nil { - return err - } - } - if err := iprot.ReadStructEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) - } - return nil + if _, err := iprot.ReadStructBegin(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) + } + + for { + _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) + if err != nil { + return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) + } + if fieldTypeId == thrift.STOP { + break + } + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + if err := iprot.ReadFieldEnd(ctx); err != nil { + return err + } + } + if err := iprot.ReadStructEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) + } + return nil } func (p *UUIDType) Write(ctx context.Context, oprot thrift.TProtocol) error { - if err := oprot.WriteStructBegin(ctx, "UUIDType"); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) } - if p != nil { - } - if err := oprot.WriteFieldStop(ctx); err != nil { - return thrift.PrependError("write field stop error: ", err) } - if err := oprot.WriteStructEnd(ctx); err != nil { - return thrift.PrependError("write struct stop error: ", err) } - return nil + if err := oprot.WriteStructBegin(ctx, "UUIDType"); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) + } + if p != nil { + } + if err := oprot.WriteFieldStop(ctx); err != nil { + return thrift.PrependError("write field stop error: ", err) + } + if err := oprot.WriteStructEnd(ctx); err != nil { + return thrift.PrependError("write struct stop error: ", err) + } + return nil } func (p *UUIDType) Equals(other *UUIDType) bool { - if p == other { - return true - } else if p == nil || other == nil { - return false - } - return true + if p == other { + return true + } else if p == nil || other == nil { + return false + } + return true } func (p *UUIDType) String() string { - if p == nil { - return "" - } - return fmt.Sprintf("UUIDType(%+v)", *p) + if p == nil { + return "" + } + return fmt.Sprintf("UUIDType(%+v)", *p) } func (p *UUIDType) Validate() error { - return nil + return nil } + type MapType struct { } func NewMapType() *MapType { - return &MapType{} + return &MapType{} } func (p *MapType) Read(ctx context.Context, iprot thrift.TProtocol) error { - if _, err := iprot.ReadStructBegin(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) - } - - - for { - _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) - if err != nil { - return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) - } - if fieldTypeId == thrift.STOP { break; } - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - if err := iprot.ReadFieldEnd(ctx); err != nil { - return err - } - } - if err := iprot.ReadStructEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) - } - return nil + if _, err := iprot.ReadStructBegin(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) + } + + for { + _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) + if err != nil { + return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) + } + if fieldTypeId == thrift.STOP { + break + } + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + if err := iprot.ReadFieldEnd(ctx); err != nil { + return err + } + } + if err := iprot.ReadStructEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) + } + return nil } func (p *MapType) Write(ctx context.Context, oprot thrift.TProtocol) error { - if err := oprot.WriteStructBegin(ctx, "MapType"); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) } - if p != nil { - } - if err := oprot.WriteFieldStop(ctx); err != nil { - return thrift.PrependError("write field stop error: ", err) } - if err := oprot.WriteStructEnd(ctx); err != nil { - return thrift.PrependError("write struct stop error: ", err) } - return nil + if err := oprot.WriteStructBegin(ctx, "MapType"); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) + } + if p != nil { + } + if err := oprot.WriteFieldStop(ctx); err != nil { + return thrift.PrependError("write field stop error: ", err) + } + if err := oprot.WriteStructEnd(ctx); err != nil { + return thrift.PrependError("write struct stop error: ", err) + } + return nil } func (p *MapType) Equals(other *MapType) bool { - if p == other { - return true - } else if p == nil || other == nil { - return false - } - return true + if p == other { + return true + } else if p == nil || other == nil { + return false + } + return true } func (p *MapType) String() string { - if p == nil { - return "" - } - return fmt.Sprintf("MapType(%+v)", *p) + if p == nil { + return "" + } + return fmt.Sprintf("MapType(%+v)", *p) } func (p *MapType) Validate() error { - return nil + return nil } + type ListType struct { } func NewListType() *ListType { - return &ListType{} + return &ListType{} } func (p *ListType) Read(ctx context.Context, iprot thrift.TProtocol) error { - if _, err := iprot.ReadStructBegin(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) - } - - - for { - _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) - if err != nil { - return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) - } - if fieldTypeId == thrift.STOP { break; } - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - if err := iprot.ReadFieldEnd(ctx); err != nil { - return err - } - } - if err := iprot.ReadStructEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) - } - return nil + if _, err := iprot.ReadStructBegin(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) + } + + for { + _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) + if err != nil { + return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) + } + if fieldTypeId == thrift.STOP { + break + } + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + if err := iprot.ReadFieldEnd(ctx); err != nil { + return err + } + } + if err := iprot.ReadStructEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) + } + return nil } func (p *ListType) Write(ctx context.Context, oprot thrift.TProtocol) error { - if err := oprot.WriteStructBegin(ctx, "ListType"); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) } - if p != nil { - } - if err := oprot.WriteFieldStop(ctx); err != nil { - return thrift.PrependError("write field stop error: ", err) } - if err := oprot.WriteStructEnd(ctx); err != nil { - return thrift.PrependError("write struct stop error: ", err) } - return nil + if err := oprot.WriteStructBegin(ctx, "ListType"); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) + } + if p != nil { + } + if err := oprot.WriteFieldStop(ctx); err != nil { + return thrift.PrependError("write field stop error: ", err) + } + if err := oprot.WriteStructEnd(ctx); err != nil { + return thrift.PrependError("write struct stop error: ", err) + } + return nil } func (p *ListType) Equals(other *ListType) bool { - if p == other { - return true - } else if p == nil || other == nil { - return false - } - return true + if p == other { + return true + } else if p == nil || other == nil { + return false + } + return true } func (p *ListType) String() string { - if p == nil { - return "" - } - return fmt.Sprintf("ListType(%+v)", *p) + if p == nil { + return "" + } + return fmt.Sprintf("ListType(%+v)", *p) } func (p *ListType) Validate() error { - return nil + return nil } + type EnumType struct { } func NewEnumType() *EnumType { - return &EnumType{} + return &EnumType{} } func (p *EnumType) Read(ctx context.Context, iprot thrift.TProtocol) error { - if _, err := iprot.ReadStructBegin(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) - } - - - for { - _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) - if err != nil { - return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) - } - if fieldTypeId == thrift.STOP { break; } - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - if err := iprot.ReadFieldEnd(ctx); err != nil { - return err - } - } - if err := iprot.ReadStructEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) - } - return nil + if _, err := iprot.ReadStructBegin(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) + } + + for { + _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) + if err != nil { + return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) + } + if fieldTypeId == thrift.STOP { + break + } + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + if err := iprot.ReadFieldEnd(ctx); err != nil { + return err + } + } + if err := iprot.ReadStructEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) + } + return nil } func (p *EnumType) Write(ctx context.Context, oprot thrift.TProtocol) error { - if err := oprot.WriteStructBegin(ctx, "EnumType"); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) } - if p != nil { - } - if err := oprot.WriteFieldStop(ctx); err != nil { - return thrift.PrependError("write field stop error: ", err) } - if err := oprot.WriteStructEnd(ctx); err != nil { - return thrift.PrependError("write struct stop error: ", err) } - return nil + if err := oprot.WriteStructBegin(ctx, "EnumType"); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) + } + if p != nil { + } + if err := oprot.WriteFieldStop(ctx); err != nil { + return thrift.PrependError("write field stop error: ", err) + } + if err := oprot.WriteStructEnd(ctx); err != nil { + return thrift.PrependError("write struct stop error: ", err) + } + return nil } func (p *EnumType) Equals(other *EnumType) bool { - if p == other { - return true - } else if p == nil || other == nil { - return false - } - return true + if p == other { + return true + } else if p == nil || other == nil { + return false + } + return true } func (p *EnumType) String() string { - if p == nil { - return "" - } - return fmt.Sprintf("EnumType(%+v)", *p) + if p == nil { + return "" + } + return fmt.Sprintf("EnumType(%+v)", *p) } func (p *EnumType) Validate() error { - return nil + return nil } + type DateType struct { } func NewDateType() *DateType { - return &DateType{} + return &DateType{} } func (p *DateType) Read(ctx context.Context, iprot thrift.TProtocol) error { - if _, err := iprot.ReadStructBegin(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) - } - - - for { - _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) - if err != nil { - return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) - } - if fieldTypeId == thrift.STOP { break; } - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - if err := iprot.ReadFieldEnd(ctx); err != nil { - return err - } - } - if err := iprot.ReadStructEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) - } - return nil + if _, err := iprot.ReadStructBegin(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) + } + + for { + _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) + if err != nil { + return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) + } + if fieldTypeId == thrift.STOP { + break + } + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + if err := iprot.ReadFieldEnd(ctx); err != nil { + return err + } + } + if err := iprot.ReadStructEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) + } + return nil } func (p *DateType) Write(ctx context.Context, oprot thrift.TProtocol) error { - if err := oprot.WriteStructBegin(ctx, "DateType"); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) } - if p != nil { - } - if err := oprot.WriteFieldStop(ctx); err != nil { - return thrift.PrependError("write field stop error: ", err) } - if err := oprot.WriteStructEnd(ctx); err != nil { - return thrift.PrependError("write struct stop error: ", err) } - return nil + if err := oprot.WriteStructBegin(ctx, "DateType"); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) + } + if p != nil { + } + if err := oprot.WriteFieldStop(ctx); err != nil { + return thrift.PrependError("write field stop error: ", err) + } + if err := oprot.WriteStructEnd(ctx); err != nil { + return thrift.PrependError("write struct stop error: ", err) + } + return nil } func (p *DateType) Equals(other *DateType) bool { - if p == other { - return true - } else if p == nil || other == nil { - return false - } - return true + if p == other { + return true + } else if p == nil || other == nil { + return false + } + return true } func (p *DateType) String() string { - if p == nil { - return "" - } - return fmt.Sprintf("DateType(%+v)", *p) + if p == nil { + return "" + } + return fmt.Sprintf("DateType(%+v)", *p) } func (p *DateType) Validate() error { - return nil + return nil } + type Float16Type struct { } func NewFloat16Type() *Float16Type { - return &Float16Type{} + return &Float16Type{} } func (p *Float16Type) Read(ctx context.Context, iprot thrift.TProtocol) error { - if _, err := iprot.ReadStructBegin(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) - } - - - for { - _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) - if err != nil { - return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) - } - if fieldTypeId == thrift.STOP { break; } - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - if err := iprot.ReadFieldEnd(ctx); err != nil { - return err - } - } - if err := iprot.ReadStructEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) - } - return nil + if _, err := iprot.ReadStructBegin(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) + } + + for { + _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) + if err != nil { + return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) + } + if fieldTypeId == thrift.STOP { + break + } + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + if err := iprot.ReadFieldEnd(ctx); err != nil { + return err + } + } + if err := iprot.ReadStructEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) + } + return nil } func (p *Float16Type) Write(ctx context.Context, oprot thrift.TProtocol) error { - if err := oprot.WriteStructBegin(ctx, "Float16Type"); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) } - if p != nil { - } - if err := oprot.WriteFieldStop(ctx); err != nil { - return thrift.PrependError("write field stop error: ", err) } - if err := oprot.WriteStructEnd(ctx); err != nil { - return thrift.PrependError("write struct stop error: ", err) } - return nil + if err := oprot.WriteStructBegin(ctx, "Float16Type"); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) + } + if p != nil { + } + if err := oprot.WriteFieldStop(ctx); err != nil { + return thrift.PrependError("write field stop error: ", err) + } + if err := oprot.WriteStructEnd(ctx); err != nil { + return thrift.PrependError("write struct stop error: ", err) + } + return nil } func (p *Float16Type) Equals(other *Float16Type) bool { - if p == other { - return true - } else if p == nil || other == nil { - return false - } - return true + if p == other { + return true + } else if p == nil || other == nil { + return false + } + return true } func (p *Float16Type) String() string { - if p == nil { - return "" - } - return fmt.Sprintf("Float16Type(%+v)", *p) + if p == nil { + return "" + } + return fmt.Sprintf("Float16Type(%+v)", *p) } func (p *Float16Type) Validate() error { - return nil + return nil } + // Logical type to annotate a column that is always null. -// +// // Sometimes when discovering the schema of existing data, values are always // null and the physical type can't be determined. This annotation signals // the case where the physical type was guessed from all null values. @@ -1358,8169 +1570,9269 @@ type NullType struct { } func NewNullType() *NullType { - return &NullType{} + return &NullType{} } func (p *NullType) Read(ctx context.Context, iprot thrift.TProtocol) error { - if _, err := iprot.ReadStructBegin(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) - } - - - for { - _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) - if err != nil { - return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) - } - if fieldTypeId == thrift.STOP { break; } - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - if err := iprot.ReadFieldEnd(ctx); err != nil { - return err - } - } - if err := iprot.ReadStructEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) - } - return nil + if _, err := iprot.ReadStructBegin(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) + } + + for { + _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) + if err != nil { + return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) + } + if fieldTypeId == thrift.STOP { + break + } + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + if err := iprot.ReadFieldEnd(ctx); err != nil { + return err + } + } + if err := iprot.ReadStructEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) + } + return nil } func (p *NullType) Write(ctx context.Context, oprot thrift.TProtocol) error { - if err := oprot.WriteStructBegin(ctx, "NullType"); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) } - if p != nil { - } - if err := oprot.WriteFieldStop(ctx); err != nil { - return thrift.PrependError("write field stop error: ", err) } - if err := oprot.WriteStructEnd(ctx); err != nil { - return thrift.PrependError("write struct stop error: ", err) } - return nil + if err := oprot.WriteStructBegin(ctx, "NullType"); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) + } + if p != nil { + } + if err := oprot.WriteFieldStop(ctx); err != nil { + return thrift.PrependError("write field stop error: ", err) + } + if err := oprot.WriteStructEnd(ctx); err != nil { + return thrift.PrependError("write struct stop error: ", err) + } + return nil } func (p *NullType) Equals(other *NullType) bool { - if p == other { - return true - } else if p == nil || other == nil { - return false - } - return true + if p == other { + return true + } else if p == nil || other == nil { + return false + } + return true } func (p *NullType) String() string { - if p == nil { - return "" - } - return fmt.Sprintf("NullType(%+v)", *p) + if p == nil { + return "" + } + return fmt.Sprintf("NullType(%+v)", *p) } func (p *NullType) Validate() error { - return nil + return nil } + // Decimal logical type annotation -// +// // To maintain forward-compatibility in v1, implementations using this logical // type must also set scale and precision on the annotated SchemaElement. -// +// // Allowed for physical types: INT32, INT64, FIXED, and BINARY -// +// // Attributes: -// - Scale -// - Precision +// - Scale +// - Precision type DecimalType struct { - Scale int32 `thrift:"scale,1,required" db:"scale" json:"scale"` - Precision int32 `thrift:"precision,2,required" db:"precision" json:"precision"` + Scale int32 `thrift:"scale,1,required" db:"scale" json:"scale"` + Precision int32 `thrift:"precision,2,required" db:"precision" json:"precision"` } func NewDecimalType() *DecimalType { - return &DecimalType{} + return &DecimalType{} } - func (p *DecimalType) GetScale() int32 { - return p.Scale + return p.Scale } func (p *DecimalType) GetPrecision() int32 { - return p.Precision + return p.Precision } func (p *DecimalType) Read(ctx context.Context, iprot thrift.TProtocol) error { - if _, err := iprot.ReadStructBegin(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) - } - - var issetScale bool = false; - var issetPrecision bool = false; - - for { - _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) - if err != nil { - return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) - } - if fieldTypeId == thrift.STOP { break; } - switch fieldId { - case 1: - if fieldTypeId == thrift.I32 { - if err := p.ReadField1(ctx, iprot); err != nil { - return err - } - issetScale = true - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 2: - if fieldTypeId == thrift.I32 { - if err := p.ReadField2(ctx, iprot); err != nil { - return err - } - issetPrecision = true - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - default: - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - if err := iprot.ReadFieldEnd(ctx); err != nil { - return err - } - } - if err := iprot.ReadStructEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) - } - if !issetScale{ - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Scale is not set")); - } - if !issetPrecision{ - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Precision is not set")); - } - return nil -} - -func (p *DecimalType) ReadField1(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadI32(ctx); err != nil { - return thrift.PrependError("error reading field 1: ", err) -} else { - p.Scale = v -} - return nil -} - -func (p *DecimalType) ReadField2(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadI32(ctx); err != nil { - return thrift.PrependError("error reading field 2: ", err) -} else { - p.Precision = v -} - return nil + if _, err := iprot.ReadStructBegin(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) + } + + var issetScale bool = false + var issetPrecision bool = false + + for { + _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) + if err != nil { + return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) + } + if fieldTypeId == thrift.STOP { + break + } + switch fieldId { + case 1: + if fieldTypeId == thrift.I32 { + if err := p.ReadField1(ctx, iprot); err != nil { + return err + } + issetScale = true + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 2: + if fieldTypeId == thrift.I32 { + if err := p.ReadField2(ctx, iprot); err != nil { + return err + } + issetPrecision = true + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + default: + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + if err := iprot.ReadFieldEnd(ctx); err != nil { + return err + } + } + if err := iprot.ReadStructEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) + } + if !issetScale { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Scale is not set")) + } + if !issetPrecision { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Precision is not set")) + } + return nil +} + +func (p *DecimalType) ReadField1(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadI32(ctx); err != nil { + return thrift.PrependError("error reading field 1: ", err) + } else { + p.Scale = v + } + return nil +} + +func (p *DecimalType) ReadField2(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadI32(ctx); err != nil { + return thrift.PrependError("error reading field 2: ", err) + } else { + p.Precision = v + } + return nil } func (p *DecimalType) Write(ctx context.Context, oprot thrift.TProtocol) error { - if err := oprot.WriteStructBegin(ctx, "DecimalType"); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) } - if p != nil { - if err := p.writeField1(ctx, oprot); err != nil { return err } - if err := p.writeField2(ctx, oprot); err != nil { return err } - } - if err := oprot.WriteFieldStop(ctx); err != nil { - return thrift.PrependError("write field stop error: ", err) } - if err := oprot.WriteStructEnd(ctx); err != nil { - return thrift.PrependError("write struct stop error: ", err) } - return nil + if err := oprot.WriteStructBegin(ctx, "DecimalType"); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) + } + if p != nil { + if err := p.writeField1(ctx, oprot); err != nil { + return err + } + if err := p.writeField2(ctx, oprot); err != nil { + return err + } + } + if err := oprot.WriteFieldStop(ctx); err != nil { + return thrift.PrependError("write field stop error: ", err) + } + if err := oprot.WriteStructEnd(ctx); err != nil { + return thrift.PrependError("write struct stop error: ", err) + } + return nil } func (p *DecimalType) writeField1(ctx context.Context, oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin(ctx, "scale", thrift.I32, 1); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:scale: ", p), err) } - if err := oprot.WriteI32(ctx, int32(p.Scale)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.scale (1) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 1:scale: ", p), err) } - return err + if err := oprot.WriteFieldBegin(ctx, "scale", thrift.I32, 1); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:scale: ", p), err) + } + if err := oprot.WriteI32(ctx, int32(p.Scale)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.scale (1) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 1:scale: ", p), err) + } + return err } func (p *DecimalType) writeField2(ctx context.Context, oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin(ctx, "precision", thrift.I32, 2); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:precision: ", p), err) } - if err := oprot.WriteI32(ctx, int32(p.Precision)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.precision (2) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 2:precision: ", p), err) } - return err + if err := oprot.WriteFieldBegin(ctx, "precision", thrift.I32, 2); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:precision: ", p), err) + } + if err := oprot.WriteI32(ctx, int32(p.Precision)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.precision (2) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 2:precision: ", p), err) + } + return err } func (p *DecimalType) Equals(other *DecimalType) bool { - if p == other { - return true - } else if p == nil || other == nil { - return false - } - if p.Scale != other.Scale { return false } - if p.Precision != other.Precision { return false } - return true + if p == other { + return true + } else if p == nil || other == nil { + return false + } + if p.Scale != other.Scale { + return false + } + if p.Precision != other.Precision { + return false + } + return true } func (p *DecimalType) String() string { - if p == nil { - return "" - } - return fmt.Sprintf("DecimalType(%+v)", *p) + if p == nil { + return "" + } + return fmt.Sprintf("DecimalType(%+v)", *p) } func (p *DecimalType) Validate() error { - return nil + return nil } + // Time units for logical types type MilliSeconds struct { } func NewMilliSeconds() *MilliSeconds { - return &MilliSeconds{} + return &MilliSeconds{} } func (p *MilliSeconds) Read(ctx context.Context, iprot thrift.TProtocol) error { - if _, err := iprot.ReadStructBegin(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) - } - - - for { - _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) - if err != nil { - return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) - } - if fieldTypeId == thrift.STOP { break; } - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - if err := iprot.ReadFieldEnd(ctx); err != nil { - return err - } - } - if err := iprot.ReadStructEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) - } - return nil + if _, err := iprot.ReadStructBegin(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) + } + + for { + _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) + if err != nil { + return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) + } + if fieldTypeId == thrift.STOP { + break + } + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + if err := iprot.ReadFieldEnd(ctx); err != nil { + return err + } + } + if err := iprot.ReadStructEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) + } + return nil } func (p *MilliSeconds) Write(ctx context.Context, oprot thrift.TProtocol) error { - if err := oprot.WriteStructBegin(ctx, "MilliSeconds"); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) } - if p != nil { - } - if err := oprot.WriteFieldStop(ctx); err != nil { - return thrift.PrependError("write field stop error: ", err) } - if err := oprot.WriteStructEnd(ctx); err != nil { - return thrift.PrependError("write struct stop error: ", err) } - return nil + if err := oprot.WriteStructBegin(ctx, "MilliSeconds"); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) + } + if p != nil { + } + if err := oprot.WriteFieldStop(ctx); err != nil { + return thrift.PrependError("write field stop error: ", err) + } + if err := oprot.WriteStructEnd(ctx); err != nil { + return thrift.PrependError("write struct stop error: ", err) + } + return nil } func (p *MilliSeconds) Equals(other *MilliSeconds) bool { - if p == other { - return true - } else if p == nil || other == nil { - return false - } - return true + if p == other { + return true + } else if p == nil || other == nil { + return false + } + return true } func (p *MilliSeconds) String() string { - if p == nil { - return "" - } - return fmt.Sprintf("MilliSeconds(%+v)", *p) + if p == nil { + return "" + } + return fmt.Sprintf("MilliSeconds(%+v)", *p) } func (p *MilliSeconds) Validate() error { - return nil + return nil } + type MicroSeconds struct { } func NewMicroSeconds() *MicroSeconds { - return &MicroSeconds{} + return &MicroSeconds{} } func (p *MicroSeconds) Read(ctx context.Context, iprot thrift.TProtocol) error { - if _, err := iprot.ReadStructBegin(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) - } - - - for { - _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) - if err != nil { - return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) - } - if fieldTypeId == thrift.STOP { break; } - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - if err := iprot.ReadFieldEnd(ctx); err != nil { - return err - } - } - if err := iprot.ReadStructEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) - } - return nil + if _, err := iprot.ReadStructBegin(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) + } + + for { + _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) + if err != nil { + return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) + } + if fieldTypeId == thrift.STOP { + break + } + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + if err := iprot.ReadFieldEnd(ctx); err != nil { + return err + } + } + if err := iprot.ReadStructEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) + } + return nil } func (p *MicroSeconds) Write(ctx context.Context, oprot thrift.TProtocol) error { - if err := oprot.WriteStructBegin(ctx, "MicroSeconds"); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) } - if p != nil { - } - if err := oprot.WriteFieldStop(ctx); err != nil { - return thrift.PrependError("write field stop error: ", err) } - if err := oprot.WriteStructEnd(ctx); err != nil { - return thrift.PrependError("write struct stop error: ", err) } - return nil + if err := oprot.WriteStructBegin(ctx, "MicroSeconds"); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) + } + if p != nil { + } + if err := oprot.WriteFieldStop(ctx); err != nil { + return thrift.PrependError("write field stop error: ", err) + } + if err := oprot.WriteStructEnd(ctx); err != nil { + return thrift.PrependError("write struct stop error: ", err) + } + return nil } func (p *MicroSeconds) Equals(other *MicroSeconds) bool { - if p == other { - return true - } else if p == nil || other == nil { - return false - } - return true + if p == other { + return true + } else if p == nil || other == nil { + return false + } + return true } func (p *MicroSeconds) String() string { - if p == nil { - return "" - } - return fmt.Sprintf("MicroSeconds(%+v)", *p) + if p == nil { + return "" + } + return fmt.Sprintf("MicroSeconds(%+v)", *p) } func (p *MicroSeconds) Validate() error { - return nil + return nil } + type NanoSeconds struct { } func NewNanoSeconds() *NanoSeconds { - return &NanoSeconds{} + return &NanoSeconds{} } func (p *NanoSeconds) Read(ctx context.Context, iprot thrift.TProtocol) error { - if _, err := iprot.ReadStructBegin(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) - } - - - for { - _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) - if err != nil { - return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) - } - if fieldTypeId == thrift.STOP { break; } - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - if err := iprot.ReadFieldEnd(ctx); err != nil { - return err - } - } - if err := iprot.ReadStructEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) - } - return nil + if _, err := iprot.ReadStructBegin(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) + } + + for { + _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) + if err != nil { + return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) + } + if fieldTypeId == thrift.STOP { + break + } + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + if err := iprot.ReadFieldEnd(ctx); err != nil { + return err + } + } + if err := iprot.ReadStructEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) + } + return nil } func (p *NanoSeconds) Write(ctx context.Context, oprot thrift.TProtocol) error { - if err := oprot.WriteStructBegin(ctx, "NanoSeconds"); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) } - if p != nil { - } - if err := oprot.WriteFieldStop(ctx); err != nil { - return thrift.PrependError("write field stop error: ", err) } - if err := oprot.WriteStructEnd(ctx); err != nil { - return thrift.PrependError("write struct stop error: ", err) } - return nil + if err := oprot.WriteStructBegin(ctx, "NanoSeconds"); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) + } + if p != nil { + } + if err := oprot.WriteFieldStop(ctx); err != nil { + return thrift.PrependError("write field stop error: ", err) + } + if err := oprot.WriteStructEnd(ctx); err != nil { + return thrift.PrependError("write struct stop error: ", err) + } + return nil } func (p *NanoSeconds) Equals(other *NanoSeconds) bool { - if p == other { - return true - } else if p == nil || other == nil { - return false - } - return true + if p == other { + return true + } else if p == nil || other == nil { + return false + } + return true } func (p *NanoSeconds) String() string { - if p == nil { - return "" - } - return fmt.Sprintf("NanoSeconds(%+v)", *p) + if p == nil { + return "" + } + return fmt.Sprintf("NanoSeconds(%+v)", *p) } func (p *NanoSeconds) Validate() error { - return nil + return nil } + // Attributes: -// - MILLIS -// - MICROS -// - NANOS +// - MILLIS +// - MICROS +// - NANOS type TimeUnit struct { - MILLIS *MilliSeconds `thrift:"MILLIS,1" db:"MILLIS" json:"MILLIS,omitempty"` - MICROS *MicroSeconds `thrift:"MICROS,2" db:"MICROS" json:"MICROS,omitempty"` - NANOS *NanoSeconds `thrift:"NANOS,3" db:"NANOS" json:"NANOS,omitempty"` + MILLIS *MilliSeconds `thrift:"MILLIS,1" db:"MILLIS" json:"MILLIS,omitempty"` + MICROS *MicroSeconds `thrift:"MICROS,2" db:"MICROS" json:"MICROS,omitempty"` + NANOS *NanoSeconds `thrift:"NANOS,3" db:"NANOS" json:"NANOS,omitempty"` } func NewTimeUnit() *TimeUnit { - return &TimeUnit{} + return &TimeUnit{} } var TimeUnit_MILLIS_DEFAULT *MilliSeconds + func (p *TimeUnit) GetMILLIS() *MilliSeconds { - if !p.IsSetMILLIS() { - return TimeUnit_MILLIS_DEFAULT - } -return p.MILLIS + if !p.IsSetMILLIS() { + return TimeUnit_MILLIS_DEFAULT + } + return p.MILLIS } + var TimeUnit_MICROS_DEFAULT *MicroSeconds + func (p *TimeUnit) GetMICROS() *MicroSeconds { - if !p.IsSetMICROS() { - return TimeUnit_MICROS_DEFAULT - } -return p.MICROS + if !p.IsSetMICROS() { + return TimeUnit_MICROS_DEFAULT + } + return p.MICROS } + var TimeUnit_NANOS_DEFAULT *NanoSeconds + func (p *TimeUnit) GetNANOS() *NanoSeconds { - if !p.IsSetNANOS() { - return TimeUnit_NANOS_DEFAULT - } -return p.NANOS + if !p.IsSetNANOS() { + return TimeUnit_NANOS_DEFAULT + } + return p.NANOS } func (p *TimeUnit) CountSetFieldsTimeUnit() int { - count := 0 - if (p.IsSetMILLIS()) { - count++ - } - if (p.IsSetMICROS()) { - count++ - } - if (p.IsSetNANOS()) { - count++ - } - return count + count := 0 + if p.IsSetMILLIS() { + count++ + } + if p.IsSetMICROS() { + count++ + } + if p.IsSetNANOS() { + count++ + } + return count } func (p *TimeUnit) IsSetMILLIS() bool { - return p.MILLIS != nil + return p.MILLIS != nil } func (p *TimeUnit) IsSetMICROS() bool { - return p.MICROS != nil + return p.MICROS != nil } func (p *TimeUnit) IsSetNANOS() bool { - return p.NANOS != nil + return p.NANOS != nil } func (p *TimeUnit) Read(ctx context.Context, iprot thrift.TProtocol) error { - if _, err := iprot.ReadStructBegin(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) - } - - - for { - _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) - if err != nil { - return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) - } - if fieldTypeId == thrift.STOP { break; } - switch fieldId { - case 1: - if fieldTypeId == thrift.STRUCT { - if err := p.ReadField1(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 2: - if fieldTypeId == thrift.STRUCT { - if err := p.ReadField2(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 3: - if fieldTypeId == thrift.STRUCT { - if err := p.ReadField3(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - default: - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - if err := iprot.ReadFieldEnd(ctx); err != nil { - return err - } - } - if err := iprot.ReadStructEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) - } - return nil -} - -func (p *TimeUnit) ReadField1(ctx context.Context, iprot thrift.TProtocol) error { - p.MILLIS = &MilliSeconds{} - if err := p.MILLIS.Read(ctx, iprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.MILLIS), err) - } - return nil -} - -func (p *TimeUnit) ReadField2(ctx context.Context, iprot thrift.TProtocol) error { - p.MICROS = &MicroSeconds{} - if err := p.MICROS.Read(ctx, iprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.MICROS), err) - } - return nil -} - -func (p *TimeUnit) ReadField3(ctx context.Context, iprot thrift.TProtocol) error { - p.NANOS = &NanoSeconds{} - if err := p.NANOS.Read(ctx, iprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.NANOS), err) - } - return nil + if _, err := iprot.ReadStructBegin(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) + } + + for { + _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) + if err != nil { + return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) + } + if fieldTypeId == thrift.STOP { + break + } + switch fieldId { + case 1: + if fieldTypeId == thrift.STRUCT { + if err := p.ReadField1(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 2: + if fieldTypeId == thrift.STRUCT { + if err := p.ReadField2(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 3: + if fieldTypeId == thrift.STRUCT { + if err := p.ReadField3(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + default: + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + if err := iprot.ReadFieldEnd(ctx); err != nil { + return err + } + } + if err := iprot.ReadStructEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) + } + return nil +} + +func (p *TimeUnit) ReadField1(ctx context.Context, iprot thrift.TProtocol) error { + p.MILLIS = &MilliSeconds{} + if err := p.MILLIS.Read(ctx, iprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.MILLIS), err) + } + return nil +} + +func (p *TimeUnit) ReadField2(ctx context.Context, iprot thrift.TProtocol) error { + p.MICROS = &MicroSeconds{} + if err := p.MICROS.Read(ctx, iprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.MICROS), err) + } + return nil +} + +func (p *TimeUnit) ReadField3(ctx context.Context, iprot thrift.TProtocol) error { + p.NANOS = &NanoSeconds{} + if err := p.NANOS.Read(ctx, iprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.NANOS), err) + } + return nil } func (p *TimeUnit) Write(ctx context.Context, oprot thrift.TProtocol) error { - if c := p.CountSetFieldsTimeUnit(); c != 1 { - return fmt.Errorf("%T write union: exactly one field must be set (%d set)", p, c) - } - if err := oprot.WriteStructBegin(ctx, "TimeUnit"); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) } - if p != nil { - if err := p.writeField1(ctx, oprot); err != nil { return err } - if err := p.writeField2(ctx, oprot); err != nil { return err } - if err := p.writeField3(ctx, oprot); err != nil { return err } - } - if err := oprot.WriteFieldStop(ctx); err != nil { - return thrift.PrependError("write field stop error: ", err) } - if err := oprot.WriteStructEnd(ctx); err != nil { - return thrift.PrependError("write struct stop error: ", err) } - return nil + if c := p.CountSetFieldsTimeUnit(); c != 1 { + return fmt.Errorf("%T write union: exactly one field must be set (%d set)", p, c) + } + if err := oprot.WriteStructBegin(ctx, "TimeUnit"); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) + } + if p != nil { + if err := p.writeField1(ctx, oprot); err != nil { + return err + } + if err := p.writeField2(ctx, oprot); err != nil { + return err + } + if err := p.writeField3(ctx, oprot); err != nil { + return err + } + } + if err := oprot.WriteFieldStop(ctx); err != nil { + return thrift.PrependError("write field stop error: ", err) + } + if err := oprot.WriteStructEnd(ctx); err != nil { + return thrift.PrependError("write struct stop error: ", err) + } + return nil } func (p *TimeUnit) writeField1(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetMILLIS() { - if err := oprot.WriteFieldBegin(ctx, "MILLIS", thrift.STRUCT, 1); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:MILLIS: ", p), err) } - if err := p.MILLIS.Write(ctx, oprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.MILLIS), err) - } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 1:MILLIS: ", p), err) } - } - return err + if p.IsSetMILLIS() { + if err := oprot.WriteFieldBegin(ctx, "MILLIS", thrift.STRUCT, 1); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:MILLIS: ", p), err) + } + if err := p.MILLIS.Write(ctx, oprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.MILLIS), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 1:MILLIS: ", p), err) + } + } + return err } func (p *TimeUnit) writeField2(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetMICROS() { - if err := oprot.WriteFieldBegin(ctx, "MICROS", thrift.STRUCT, 2); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:MICROS: ", p), err) } - if err := p.MICROS.Write(ctx, oprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.MICROS), err) - } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 2:MICROS: ", p), err) } - } - return err + if p.IsSetMICROS() { + if err := oprot.WriteFieldBegin(ctx, "MICROS", thrift.STRUCT, 2); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:MICROS: ", p), err) + } + if err := p.MICROS.Write(ctx, oprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.MICROS), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 2:MICROS: ", p), err) + } + } + return err } func (p *TimeUnit) writeField3(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetNANOS() { - if err := oprot.WriteFieldBegin(ctx, "NANOS", thrift.STRUCT, 3); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 3:NANOS: ", p), err) } - if err := p.NANOS.Write(ctx, oprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.NANOS), err) - } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 3:NANOS: ", p), err) } - } - return err + if p.IsSetNANOS() { + if err := oprot.WriteFieldBegin(ctx, "NANOS", thrift.STRUCT, 3); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 3:NANOS: ", p), err) + } + if err := p.NANOS.Write(ctx, oprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.NANOS), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 3:NANOS: ", p), err) + } + } + return err } func (p *TimeUnit) Equals(other *TimeUnit) bool { - if p == other { - return true - } else if p == nil || other == nil { - return false - } - if !p.MILLIS.Equals(other.MILLIS) { return false } - if !p.MICROS.Equals(other.MICROS) { return false } - if !p.NANOS.Equals(other.NANOS) { return false } - return true + if p == other { + return true + } else if p == nil || other == nil { + return false + } + if !p.MILLIS.Equals(other.MILLIS) { + return false + } + if !p.MICROS.Equals(other.MICROS) { + return false + } + if !p.NANOS.Equals(other.NANOS) { + return false + } + return true } func (p *TimeUnit) String() string { - if p == nil { - return "" - } - return fmt.Sprintf("TimeUnit(%+v)", *p) + if p == nil { + return "" + } + return fmt.Sprintf("TimeUnit(%+v)", *p) } func (p *TimeUnit) Validate() error { - return nil + return nil } + // Timestamp logical type annotation -// +// // Allowed for physical types: INT64 -// +// // Attributes: -// - IsAdjustedToUTC -// - Unit +// - IsAdjustedToUTC +// - Unit type TimestampType struct { - IsAdjustedToUTC bool `thrift:"isAdjustedToUTC,1,required" db:"isAdjustedToUTC" json:"isAdjustedToUTC"` - Unit *TimeUnit `thrift:"unit,2,required" db:"unit" json:"unit"` + IsAdjustedToUTC bool `thrift:"isAdjustedToUTC,1,required" db:"isAdjustedToUTC" json:"isAdjustedToUTC"` + Unit *TimeUnit `thrift:"unit,2,required" db:"unit" json:"unit"` } func NewTimestampType() *TimestampType { - return &TimestampType{} + return &TimestampType{} } - func (p *TimestampType) GetIsAdjustedToUTC() bool { - return p.IsAdjustedToUTC + return p.IsAdjustedToUTC } + var TimestampType_Unit_DEFAULT *TimeUnit + func (p *TimestampType) GetUnit() *TimeUnit { - if !p.IsSetUnit() { - return TimestampType_Unit_DEFAULT - } -return p.Unit + if !p.IsSetUnit() { + return TimestampType_Unit_DEFAULT + } + return p.Unit } func (p *TimestampType) IsSetUnit() bool { - return p.Unit != nil + return p.Unit != nil } func (p *TimestampType) Read(ctx context.Context, iprot thrift.TProtocol) error { - if _, err := iprot.ReadStructBegin(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) - } - - var issetIsAdjustedToUTC bool = false; - var issetUnit bool = false; - - for { - _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) - if err != nil { - return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) - } - if fieldTypeId == thrift.STOP { break; } - switch fieldId { - case 1: - if fieldTypeId == thrift.BOOL { - if err := p.ReadField1(ctx, iprot); err != nil { - return err - } - issetIsAdjustedToUTC = true - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 2: - if fieldTypeId == thrift.STRUCT { - if err := p.ReadField2(ctx, iprot); err != nil { - return err - } - issetUnit = true - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - default: - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - if err := iprot.ReadFieldEnd(ctx); err != nil { - return err - } - } - if err := iprot.ReadStructEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) - } - if !issetIsAdjustedToUTC{ - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field IsAdjustedToUTC is not set")); - } - if !issetUnit{ - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Unit is not set")); - } - return nil -} - -func (p *TimestampType) ReadField1(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadBool(ctx); err != nil { - return thrift.PrependError("error reading field 1: ", err) -} else { - p.IsAdjustedToUTC = v -} - return nil -} - -func (p *TimestampType) ReadField2(ctx context.Context, iprot thrift.TProtocol) error { - p.Unit = &TimeUnit{} - if err := p.Unit.Read(ctx, iprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.Unit), err) - } - return nil + if _, err := iprot.ReadStructBegin(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) + } + + var issetIsAdjustedToUTC bool = false + var issetUnit bool = false + + for { + _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) + if err != nil { + return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) + } + if fieldTypeId == thrift.STOP { + break + } + switch fieldId { + case 1: + if fieldTypeId == thrift.BOOL { + if err := p.ReadField1(ctx, iprot); err != nil { + return err + } + issetIsAdjustedToUTC = true + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 2: + if fieldTypeId == thrift.STRUCT { + if err := p.ReadField2(ctx, iprot); err != nil { + return err + } + issetUnit = true + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + default: + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + if err := iprot.ReadFieldEnd(ctx); err != nil { + return err + } + } + if err := iprot.ReadStructEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) + } + if !issetIsAdjustedToUTC { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field IsAdjustedToUTC is not set")) + } + if !issetUnit { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Unit is not set")) + } + return nil +} + +func (p *TimestampType) ReadField1(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadBool(ctx); err != nil { + return thrift.PrependError("error reading field 1: ", err) + } else { + p.IsAdjustedToUTC = v + } + return nil +} + +func (p *TimestampType) ReadField2(ctx context.Context, iprot thrift.TProtocol) error { + p.Unit = &TimeUnit{} + if err := p.Unit.Read(ctx, iprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.Unit), err) + } + return nil } func (p *TimestampType) Write(ctx context.Context, oprot thrift.TProtocol) error { - if err := oprot.WriteStructBegin(ctx, "TimestampType"); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) } - if p != nil { - if err := p.writeField1(ctx, oprot); err != nil { return err } - if err := p.writeField2(ctx, oprot); err != nil { return err } - } - if err := oprot.WriteFieldStop(ctx); err != nil { - return thrift.PrependError("write field stop error: ", err) } - if err := oprot.WriteStructEnd(ctx); err != nil { - return thrift.PrependError("write struct stop error: ", err) } - return nil + if err := oprot.WriteStructBegin(ctx, "TimestampType"); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) + } + if p != nil { + if err := p.writeField1(ctx, oprot); err != nil { + return err + } + if err := p.writeField2(ctx, oprot); err != nil { + return err + } + } + if err := oprot.WriteFieldStop(ctx); err != nil { + return thrift.PrependError("write field stop error: ", err) + } + if err := oprot.WriteStructEnd(ctx); err != nil { + return thrift.PrependError("write struct stop error: ", err) + } + return nil } func (p *TimestampType) writeField1(ctx context.Context, oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin(ctx, "isAdjustedToUTC", thrift.BOOL, 1); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:isAdjustedToUTC: ", p), err) } - if err := oprot.WriteBool(ctx, bool(p.IsAdjustedToUTC)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.isAdjustedToUTC (1) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 1:isAdjustedToUTC: ", p), err) } - return err + if err := oprot.WriteFieldBegin(ctx, "isAdjustedToUTC", thrift.BOOL, 1); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:isAdjustedToUTC: ", p), err) + } + if err := oprot.WriteBool(ctx, bool(p.IsAdjustedToUTC)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.isAdjustedToUTC (1) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 1:isAdjustedToUTC: ", p), err) + } + return err } func (p *TimestampType) writeField2(ctx context.Context, oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin(ctx, "unit", thrift.STRUCT, 2); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:unit: ", p), err) } - if err := p.Unit.Write(ctx, oprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.Unit), err) - } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 2:unit: ", p), err) } - return err + if err := oprot.WriteFieldBegin(ctx, "unit", thrift.STRUCT, 2); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:unit: ", p), err) + } + if err := p.Unit.Write(ctx, oprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.Unit), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 2:unit: ", p), err) + } + return err } func (p *TimestampType) Equals(other *TimestampType) bool { - if p == other { - return true - } else if p == nil || other == nil { - return false - } - if p.IsAdjustedToUTC != other.IsAdjustedToUTC { return false } - if !p.Unit.Equals(other.Unit) { return false } - return true + if p == other { + return true + } else if p == nil || other == nil { + return false + } + if p.IsAdjustedToUTC != other.IsAdjustedToUTC { + return false + } + if !p.Unit.Equals(other.Unit) { + return false + } + return true } func (p *TimestampType) String() string { - if p == nil { - return "" - } - return fmt.Sprintf("TimestampType(%+v)", *p) + if p == nil { + return "" + } + return fmt.Sprintf("TimestampType(%+v)", *p) } func (p *TimestampType) Validate() error { - return nil + return nil } + // Time logical type annotation -// +// // Allowed for physical types: INT32 (millis), INT64 (micros, nanos) -// +// // Attributes: -// - IsAdjustedToUTC -// - Unit +// - IsAdjustedToUTC +// - Unit type TimeType struct { - IsAdjustedToUTC bool `thrift:"isAdjustedToUTC,1,required" db:"isAdjustedToUTC" json:"isAdjustedToUTC"` - Unit *TimeUnit `thrift:"unit,2,required" db:"unit" json:"unit"` + IsAdjustedToUTC bool `thrift:"isAdjustedToUTC,1,required" db:"isAdjustedToUTC" json:"isAdjustedToUTC"` + Unit *TimeUnit `thrift:"unit,2,required" db:"unit" json:"unit"` } func NewTimeType() *TimeType { - return &TimeType{} + return &TimeType{} } - func (p *TimeType) GetIsAdjustedToUTC() bool { - return p.IsAdjustedToUTC + return p.IsAdjustedToUTC } + var TimeType_Unit_DEFAULT *TimeUnit + func (p *TimeType) GetUnit() *TimeUnit { - if !p.IsSetUnit() { - return TimeType_Unit_DEFAULT - } -return p.Unit + if !p.IsSetUnit() { + return TimeType_Unit_DEFAULT + } + return p.Unit } func (p *TimeType) IsSetUnit() bool { - return p.Unit != nil + return p.Unit != nil } func (p *TimeType) Read(ctx context.Context, iprot thrift.TProtocol) error { - if _, err := iprot.ReadStructBegin(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) - } - - var issetIsAdjustedToUTC bool = false; - var issetUnit bool = false; - - for { - _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) - if err != nil { - return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) - } - if fieldTypeId == thrift.STOP { break; } - switch fieldId { - case 1: - if fieldTypeId == thrift.BOOL { - if err := p.ReadField1(ctx, iprot); err != nil { - return err - } - issetIsAdjustedToUTC = true - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 2: - if fieldTypeId == thrift.STRUCT { - if err := p.ReadField2(ctx, iprot); err != nil { - return err - } - issetUnit = true - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - default: - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - if err := iprot.ReadFieldEnd(ctx); err != nil { - return err - } - } - if err := iprot.ReadStructEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) - } - if !issetIsAdjustedToUTC{ - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field IsAdjustedToUTC is not set")); - } - if !issetUnit{ - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Unit is not set")); - } - return nil -} - -func (p *TimeType) ReadField1(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadBool(ctx); err != nil { - return thrift.PrependError("error reading field 1: ", err) -} else { - p.IsAdjustedToUTC = v -} - return nil -} - -func (p *TimeType) ReadField2(ctx context.Context, iprot thrift.TProtocol) error { - p.Unit = &TimeUnit{} - if err := p.Unit.Read(ctx, iprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.Unit), err) - } - return nil + if _, err := iprot.ReadStructBegin(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) + } + + var issetIsAdjustedToUTC bool = false + var issetUnit bool = false + + for { + _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) + if err != nil { + return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) + } + if fieldTypeId == thrift.STOP { + break + } + switch fieldId { + case 1: + if fieldTypeId == thrift.BOOL { + if err := p.ReadField1(ctx, iprot); err != nil { + return err + } + issetIsAdjustedToUTC = true + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 2: + if fieldTypeId == thrift.STRUCT { + if err := p.ReadField2(ctx, iprot); err != nil { + return err + } + issetUnit = true + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + default: + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + if err := iprot.ReadFieldEnd(ctx); err != nil { + return err + } + } + if err := iprot.ReadStructEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) + } + if !issetIsAdjustedToUTC { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field IsAdjustedToUTC is not set")) + } + if !issetUnit { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Unit is not set")) + } + return nil +} + +func (p *TimeType) ReadField1(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadBool(ctx); err != nil { + return thrift.PrependError("error reading field 1: ", err) + } else { + p.IsAdjustedToUTC = v + } + return nil +} + +func (p *TimeType) ReadField2(ctx context.Context, iprot thrift.TProtocol) error { + p.Unit = &TimeUnit{} + if err := p.Unit.Read(ctx, iprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.Unit), err) + } + return nil } func (p *TimeType) Write(ctx context.Context, oprot thrift.TProtocol) error { - if err := oprot.WriteStructBegin(ctx, "TimeType"); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) } - if p != nil { - if err := p.writeField1(ctx, oprot); err != nil { return err } - if err := p.writeField2(ctx, oprot); err != nil { return err } - } - if err := oprot.WriteFieldStop(ctx); err != nil { - return thrift.PrependError("write field stop error: ", err) } - if err := oprot.WriteStructEnd(ctx); err != nil { - return thrift.PrependError("write struct stop error: ", err) } - return nil + if err := oprot.WriteStructBegin(ctx, "TimeType"); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) + } + if p != nil { + if err := p.writeField1(ctx, oprot); err != nil { + return err + } + if err := p.writeField2(ctx, oprot); err != nil { + return err + } + } + if err := oprot.WriteFieldStop(ctx); err != nil { + return thrift.PrependError("write field stop error: ", err) + } + if err := oprot.WriteStructEnd(ctx); err != nil { + return thrift.PrependError("write struct stop error: ", err) + } + return nil } func (p *TimeType) writeField1(ctx context.Context, oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin(ctx, "isAdjustedToUTC", thrift.BOOL, 1); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:isAdjustedToUTC: ", p), err) } - if err := oprot.WriteBool(ctx, bool(p.IsAdjustedToUTC)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.isAdjustedToUTC (1) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 1:isAdjustedToUTC: ", p), err) } - return err + if err := oprot.WriteFieldBegin(ctx, "isAdjustedToUTC", thrift.BOOL, 1); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:isAdjustedToUTC: ", p), err) + } + if err := oprot.WriteBool(ctx, bool(p.IsAdjustedToUTC)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.isAdjustedToUTC (1) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 1:isAdjustedToUTC: ", p), err) + } + return err } func (p *TimeType) writeField2(ctx context.Context, oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin(ctx, "unit", thrift.STRUCT, 2); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:unit: ", p), err) } - if err := p.Unit.Write(ctx, oprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.Unit), err) - } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 2:unit: ", p), err) } - return err + if err := oprot.WriteFieldBegin(ctx, "unit", thrift.STRUCT, 2); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:unit: ", p), err) + } + if err := p.Unit.Write(ctx, oprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.Unit), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 2:unit: ", p), err) + } + return err } func (p *TimeType) Equals(other *TimeType) bool { - if p == other { - return true - } else if p == nil || other == nil { - return false - } - if p.IsAdjustedToUTC != other.IsAdjustedToUTC { return false } - if !p.Unit.Equals(other.Unit) { return false } - return true + if p == other { + return true + } else if p == nil || other == nil { + return false + } + if p.IsAdjustedToUTC != other.IsAdjustedToUTC { + return false + } + if !p.Unit.Equals(other.Unit) { + return false + } + return true } func (p *TimeType) String() string { - if p == nil { - return "" - } - return fmt.Sprintf("TimeType(%+v)", *p) + if p == nil { + return "" + } + return fmt.Sprintf("TimeType(%+v)", *p) } func (p *TimeType) Validate() error { - return nil + return nil } + // Integer logical type annotation -// +// // bitWidth must be 8, 16, 32, or 64. -// +// // Allowed for physical types: INT32, INT64 -// +// // Attributes: -// - BitWidth -// - IsSigned +// - BitWidth +// - IsSigned type IntType struct { - BitWidth int8 `thrift:"bitWidth,1,required" db:"bitWidth" json:"bitWidth"` - IsSigned bool `thrift:"isSigned,2,required" db:"isSigned" json:"isSigned"` + BitWidth int8 `thrift:"bitWidth,1,required" db:"bitWidth" json:"bitWidth"` + IsSigned bool `thrift:"isSigned,2,required" db:"isSigned" json:"isSigned"` } func NewIntType() *IntType { - return &IntType{} + return &IntType{} } - func (p *IntType) GetBitWidth() int8 { - return p.BitWidth + return p.BitWidth } func (p *IntType) GetIsSigned() bool { - return p.IsSigned + return p.IsSigned } func (p *IntType) Read(ctx context.Context, iprot thrift.TProtocol) error { - if _, err := iprot.ReadStructBegin(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) - } - - var issetBitWidth bool = false; - var issetIsSigned bool = false; - - for { - _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) - if err != nil { - return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) - } - if fieldTypeId == thrift.STOP { break; } - switch fieldId { - case 1: - if fieldTypeId == thrift.BYTE { - if err := p.ReadField1(ctx, iprot); err != nil { - return err - } - issetBitWidth = true - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 2: - if fieldTypeId == thrift.BOOL { - if err := p.ReadField2(ctx, iprot); err != nil { - return err - } - issetIsSigned = true - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - default: - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - if err := iprot.ReadFieldEnd(ctx); err != nil { - return err - } - } - if err := iprot.ReadStructEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) - } - if !issetBitWidth{ - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field BitWidth is not set")); - } - if !issetIsSigned{ - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field IsSigned is not set")); - } - return nil -} - -func (p *IntType) ReadField1(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadByte(ctx); err != nil { - return thrift.PrependError("error reading field 1: ", err) -} else { - temp := int8(v) - p.BitWidth = temp -} - return nil -} - -func (p *IntType) ReadField2(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadBool(ctx); err != nil { - return thrift.PrependError("error reading field 2: ", err) -} else { - p.IsSigned = v -} - return nil + if _, err := iprot.ReadStructBegin(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) + } + + var issetBitWidth bool = false + var issetIsSigned bool = false + + for { + _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) + if err != nil { + return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) + } + if fieldTypeId == thrift.STOP { + break + } + switch fieldId { + case 1: + if fieldTypeId == thrift.BYTE { + if err := p.ReadField1(ctx, iprot); err != nil { + return err + } + issetBitWidth = true + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 2: + if fieldTypeId == thrift.BOOL { + if err := p.ReadField2(ctx, iprot); err != nil { + return err + } + issetIsSigned = true + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + default: + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + if err := iprot.ReadFieldEnd(ctx); err != nil { + return err + } + } + if err := iprot.ReadStructEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) + } + if !issetBitWidth { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field BitWidth is not set")) + } + if !issetIsSigned { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field IsSigned is not set")) + } + return nil +} + +func (p *IntType) ReadField1(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadByte(ctx); err != nil { + return thrift.PrependError("error reading field 1: ", err) + } else { + temp := int8(v) + p.BitWidth = temp + } + return nil +} + +func (p *IntType) ReadField2(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadBool(ctx); err != nil { + return thrift.PrependError("error reading field 2: ", err) + } else { + p.IsSigned = v + } + return nil } func (p *IntType) Write(ctx context.Context, oprot thrift.TProtocol) error { - if err := oprot.WriteStructBegin(ctx, "IntType"); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) } - if p != nil { - if err := p.writeField1(ctx, oprot); err != nil { return err } - if err := p.writeField2(ctx, oprot); err != nil { return err } - } - if err := oprot.WriteFieldStop(ctx); err != nil { - return thrift.PrependError("write field stop error: ", err) } - if err := oprot.WriteStructEnd(ctx); err != nil { - return thrift.PrependError("write struct stop error: ", err) } - return nil + if err := oprot.WriteStructBegin(ctx, "IntType"); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) + } + if p != nil { + if err := p.writeField1(ctx, oprot); err != nil { + return err + } + if err := p.writeField2(ctx, oprot); err != nil { + return err + } + } + if err := oprot.WriteFieldStop(ctx); err != nil { + return thrift.PrependError("write field stop error: ", err) + } + if err := oprot.WriteStructEnd(ctx); err != nil { + return thrift.PrependError("write struct stop error: ", err) + } + return nil } func (p *IntType) writeField1(ctx context.Context, oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin(ctx, "bitWidth", thrift.BYTE, 1); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:bitWidth: ", p), err) } - if err := oprot.WriteByte(ctx, int8(p.BitWidth)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.bitWidth (1) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 1:bitWidth: ", p), err) } - return err + if err := oprot.WriteFieldBegin(ctx, "bitWidth", thrift.BYTE, 1); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:bitWidth: ", p), err) + } + if err := oprot.WriteByte(ctx, int8(p.BitWidth)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.bitWidth (1) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 1:bitWidth: ", p), err) + } + return err } func (p *IntType) writeField2(ctx context.Context, oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin(ctx, "isSigned", thrift.BOOL, 2); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:isSigned: ", p), err) } - if err := oprot.WriteBool(ctx, bool(p.IsSigned)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.isSigned (2) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 2:isSigned: ", p), err) } - return err + if err := oprot.WriteFieldBegin(ctx, "isSigned", thrift.BOOL, 2); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:isSigned: ", p), err) + } + if err := oprot.WriteBool(ctx, bool(p.IsSigned)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.isSigned (2) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 2:isSigned: ", p), err) + } + return err } func (p *IntType) Equals(other *IntType) bool { - if p == other { - return true - } else if p == nil || other == nil { - return false - } - if p.BitWidth != other.BitWidth { return false } - if p.IsSigned != other.IsSigned { return false } - return true + if p == other { + return true + } else if p == nil || other == nil { + return false + } + if p.BitWidth != other.BitWidth { + return false + } + if p.IsSigned != other.IsSigned { + return false + } + return true } func (p *IntType) String() string { - if p == nil { - return "" - } - return fmt.Sprintf("IntType(%+v)", *p) + if p == nil { + return "" + } + return fmt.Sprintf("IntType(%+v)", *p) } func (p *IntType) Validate() error { - return nil + return nil } + // Embedded JSON logical type annotation -// +// // Allowed for physical types: BINARY type JsonType struct { } func NewJsonType() *JsonType { - return &JsonType{} + return &JsonType{} } func (p *JsonType) Read(ctx context.Context, iprot thrift.TProtocol) error { - if _, err := iprot.ReadStructBegin(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) - } - - - for { - _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) - if err != nil { - return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) - } - if fieldTypeId == thrift.STOP { break; } - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - if err := iprot.ReadFieldEnd(ctx); err != nil { - return err - } - } - if err := iprot.ReadStructEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) - } - return nil + if _, err := iprot.ReadStructBegin(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) + } + + for { + _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) + if err != nil { + return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) + } + if fieldTypeId == thrift.STOP { + break + } + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + if err := iprot.ReadFieldEnd(ctx); err != nil { + return err + } + } + if err := iprot.ReadStructEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) + } + return nil } func (p *JsonType) Write(ctx context.Context, oprot thrift.TProtocol) error { - if err := oprot.WriteStructBegin(ctx, "JsonType"); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) } - if p != nil { - } - if err := oprot.WriteFieldStop(ctx); err != nil { - return thrift.PrependError("write field stop error: ", err) } - if err := oprot.WriteStructEnd(ctx); err != nil { - return thrift.PrependError("write struct stop error: ", err) } - return nil + if err := oprot.WriteStructBegin(ctx, "JsonType"); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) + } + if p != nil { + } + if err := oprot.WriteFieldStop(ctx); err != nil { + return thrift.PrependError("write field stop error: ", err) + } + if err := oprot.WriteStructEnd(ctx); err != nil { + return thrift.PrependError("write struct stop error: ", err) + } + return nil } func (p *JsonType) Equals(other *JsonType) bool { - if p == other { - return true - } else if p == nil || other == nil { - return false - } - return true + if p == other { + return true + } else if p == nil || other == nil { + return false + } + return true } func (p *JsonType) String() string { - if p == nil { - return "" - } - return fmt.Sprintf("JsonType(%+v)", *p) + if p == nil { + return "" + } + return fmt.Sprintf("JsonType(%+v)", *p) } func (p *JsonType) Validate() error { - return nil + return nil } + // Embedded BSON logical type annotation -// +// // Allowed for physical types: BINARY type BsonType struct { } func NewBsonType() *BsonType { - return &BsonType{} + return &BsonType{} } func (p *BsonType) Read(ctx context.Context, iprot thrift.TProtocol) error { - if _, err := iprot.ReadStructBegin(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) - } - - - for { - _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) - if err != nil { - return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) - } - if fieldTypeId == thrift.STOP { break; } - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - if err := iprot.ReadFieldEnd(ctx); err != nil { - return err - } - } - if err := iprot.ReadStructEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) - } - return nil + if _, err := iprot.ReadStructBegin(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) + } + + for { + _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) + if err != nil { + return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) + } + if fieldTypeId == thrift.STOP { + break + } + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + if err := iprot.ReadFieldEnd(ctx); err != nil { + return err + } + } + if err := iprot.ReadStructEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) + } + return nil } func (p *BsonType) Write(ctx context.Context, oprot thrift.TProtocol) error { - if err := oprot.WriteStructBegin(ctx, "BsonType"); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) } - if p != nil { - } - if err := oprot.WriteFieldStop(ctx); err != nil { - return thrift.PrependError("write field stop error: ", err) } - if err := oprot.WriteStructEnd(ctx); err != nil { - return thrift.PrependError("write struct stop error: ", err) } - return nil + if err := oprot.WriteStructBegin(ctx, "BsonType"); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) + } + if p != nil { + } + if err := oprot.WriteFieldStop(ctx); err != nil { + return thrift.PrependError("write field stop error: ", err) + } + if err := oprot.WriteStructEnd(ctx); err != nil { + return thrift.PrependError("write struct stop error: ", err) + } + return nil } func (p *BsonType) Equals(other *BsonType) bool { - if p == other { - return true - } else if p == nil || other == nil { - return false - } - return true + if p == other { + return true + } else if p == nil || other == nil { + return false + } + return true } func (p *BsonType) String() string { - if p == nil { - return "" - } - return fmt.Sprintf("BsonType(%+v)", *p) + if p == nil { + return "" + } + return fmt.Sprintf("BsonType(%+v)", *p) } func (p *BsonType) Validate() error { - return nil + return nil } + // LogicalType annotations to replace ConvertedType. -// +// // To maintain compatibility, implementations using LogicalType for a // SchemaElement must also set the corresponding ConvertedType (if any) // from the following table. -// +// // Attributes: -// - STRING -// - MAP -// - LIST -// - ENUM -// - DECIMAL -// - DATE -// - TIME -// - TIMESTAMP -// - INTEGER -// - UNKNOWN -// - JSON -// - BSON -// - UUID -// - FLOAT16 +// - STRING +// - MAP +// - LIST +// - ENUM +// - DECIMAL +// - DATE +// - TIME +// - TIMESTAMP +// - INTEGER +// - UNKNOWN +// - JSON +// - BSON +// - UUID +// - FLOAT16 type LogicalType struct { - STRING *StringType `thrift:"STRING,1" db:"STRING" json:"STRING,omitempty"` - MAP *MapType `thrift:"MAP,2" db:"MAP" json:"MAP,omitempty"` - LIST *ListType `thrift:"LIST,3" db:"LIST" json:"LIST,omitempty"` - ENUM *EnumType `thrift:"ENUM,4" db:"ENUM" json:"ENUM,omitempty"` - DECIMAL *DecimalType `thrift:"DECIMAL,5" db:"DECIMAL" json:"DECIMAL,omitempty"` - DATE *DateType `thrift:"DATE,6" db:"DATE" json:"DATE,omitempty"` - TIME *TimeType `thrift:"TIME,7" db:"TIME" json:"TIME,omitempty"` - TIMESTAMP *TimestampType `thrift:"TIMESTAMP,8" db:"TIMESTAMP" json:"TIMESTAMP,omitempty"` - // unused field # 9 - INTEGER *IntType `thrift:"INTEGER,10" db:"INTEGER" json:"INTEGER,omitempty"` - UNKNOWN *NullType `thrift:"UNKNOWN,11" db:"UNKNOWN" json:"UNKNOWN,omitempty"` - JSON *JsonType `thrift:"JSON,12" db:"JSON" json:"JSON,omitempty"` - BSON *BsonType `thrift:"BSON,13" db:"BSON" json:"BSON,omitempty"` - UUID *UUIDType `thrift:"UUID,14" db:"UUID" json:"UUID,omitempty"` - FLOAT16 *Float16Type `thrift:"FLOAT16,15" db:"FLOAT16" json:"FLOAT16,omitempty"` + STRING *StringType `thrift:"STRING,1" db:"STRING" json:"STRING,omitempty"` + MAP *MapType `thrift:"MAP,2" db:"MAP" json:"MAP,omitempty"` + LIST *ListType `thrift:"LIST,3" db:"LIST" json:"LIST,omitempty"` + ENUM *EnumType `thrift:"ENUM,4" db:"ENUM" json:"ENUM,omitempty"` + DECIMAL *DecimalType `thrift:"DECIMAL,5" db:"DECIMAL" json:"DECIMAL,omitempty"` + DATE *DateType `thrift:"DATE,6" db:"DATE" json:"DATE,omitempty"` + TIME *TimeType `thrift:"TIME,7" db:"TIME" json:"TIME,omitempty"` + TIMESTAMP *TimestampType `thrift:"TIMESTAMP,8" db:"TIMESTAMP" json:"TIMESTAMP,omitempty"` + // unused field # 9 + INTEGER *IntType `thrift:"INTEGER,10" db:"INTEGER" json:"INTEGER,omitempty"` + UNKNOWN *NullType `thrift:"UNKNOWN,11" db:"UNKNOWN" json:"UNKNOWN,omitempty"` + JSON *JsonType `thrift:"JSON,12" db:"JSON" json:"JSON,omitempty"` + BSON *BsonType `thrift:"BSON,13" db:"BSON" json:"BSON,omitempty"` + UUID *UUIDType `thrift:"UUID,14" db:"UUID" json:"UUID,omitempty"` + FLOAT16 *Float16Type `thrift:"FLOAT16,15" db:"FLOAT16" json:"FLOAT16,omitempty"` } func NewLogicalType() *LogicalType { - return &LogicalType{} + return &LogicalType{} } var LogicalType_STRING_DEFAULT *StringType + func (p *LogicalType) GetSTRING() *StringType { - if !p.IsSetSTRING() { - return LogicalType_STRING_DEFAULT - } -return p.STRING + if !p.IsSetSTRING() { + return LogicalType_STRING_DEFAULT + } + return p.STRING } + var LogicalType_MAP_DEFAULT *MapType + func (p *LogicalType) GetMAP() *MapType { - if !p.IsSetMAP() { - return LogicalType_MAP_DEFAULT - } -return p.MAP + if !p.IsSetMAP() { + return LogicalType_MAP_DEFAULT + } + return p.MAP } + var LogicalType_LIST_DEFAULT *ListType + func (p *LogicalType) GetLIST() *ListType { - if !p.IsSetLIST() { - return LogicalType_LIST_DEFAULT - } -return p.LIST + if !p.IsSetLIST() { + return LogicalType_LIST_DEFAULT + } + return p.LIST } + var LogicalType_ENUM_DEFAULT *EnumType + func (p *LogicalType) GetENUM() *EnumType { - if !p.IsSetENUM() { - return LogicalType_ENUM_DEFAULT - } -return p.ENUM + if !p.IsSetENUM() { + return LogicalType_ENUM_DEFAULT + } + return p.ENUM } + var LogicalType_DECIMAL_DEFAULT *DecimalType + func (p *LogicalType) GetDECIMAL() *DecimalType { - if !p.IsSetDECIMAL() { - return LogicalType_DECIMAL_DEFAULT - } -return p.DECIMAL + if !p.IsSetDECIMAL() { + return LogicalType_DECIMAL_DEFAULT + } + return p.DECIMAL } + var LogicalType_DATE_DEFAULT *DateType + func (p *LogicalType) GetDATE() *DateType { - if !p.IsSetDATE() { - return LogicalType_DATE_DEFAULT - } -return p.DATE + if !p.IsSetDATE() { + return LogicalType_DATE_DEFAULT + } + return p.DATE } + var LogicalType_TIME_DEFAULT *TimeType + func (p *LogicalType) GetTIME() *TimeType { - if !p.IsSetTIME() { - return LogicalType_TIME_DEFAULT - } -return p.TIME + if !p.IsSetTIME() { + return LogicalType_TIME_DEFAULT + } + return p.TIME } + var LogicalType_TIMESTAMP_DEFAULT *TimestampType + func (p *LogicalType) GetTIMESTAMP() *TimestampType { - if !p.IsSetTIMESTAMP() { - return LogicalType_TIMESTAMP_DEFAULT - } -return p.TIMESTAMP + if !p.IsSetTIMESTAMP() { + return LogicalType_TIMESTAMP_DEFAULT + } + return p.TIMESTAMP } + var LogicalType_INTEGER_DEFAULT *IntType + func (p *LogicalType) GetINTEGER() *IntType { - if !p.IsSetINTEGER() { - return LogicalType_INTEGER_DEFAULT - } -return p.INTEGER + if !p.IsSetINTEGER() { + return LogicalType_INTEGER_DEFAULT + } + return p.INTEGER } + var LogicalType_UNKNOWN_DEFAULT *NullType + func (p *LogicalType) GetUNKNOWN() *NullType { - if !p.IsSetUNKNOWN() { - return LogicalType_UNKNOWN_DEFAULT - } -return p.UNKNOWN + if !p.IsSetUNKNOWN() { + return LogicalType_UNKNOWN_DEFAULT + } + return p.UNKNOWN } + var LogicalType_JSON_DEFAULT *JsonType + func (p *LogicalType) GetJSON() *JsonType { - if !p.IsSetJSON() { - return LogicalType_JSON_DEFAULT - } -return p.JSON + if !p.IsSetJSON() { + return LogicalType_JSON_DEFAULT + } + return p.JSON } + var LogicalType_BSON_DEFAULT *BsonType + func (p *LogicalType) GetBSON() *BsonType { - if !p.IsSetBSON() { - return LogicalType_BSON_DEFAULT - } -return p.BSON + if !p.IsSetBSON() { + return LogicalType_BSON_DEFAULT + } + return p.BSON } + var LogicalType_UUID_DEFAULT *UUIDType + func (p *LogicalType) GetUUID() *UUIDType { - if !p.IsSetUUID() { - return LogicalType_UUID_DEFAULT - } -return p.UUID + if !p.IsSetUUID() { + return LogicalType_UUID_DEFAULT + } + return p.UUID } + var LogicalType_FLOAT16_DEFAULT *Float16Type + func (p *LogicalType) GetFLOAT16() *Float16Type { - if !p.IsSetFLOAT16() { - return LogicalType_FLOAT16_DEFAULT - } -return p.FLOAT16 + if !p.IsSetFLOAT16() { + return LogicalType_FLOAT16_DEFAULT + } + return p.FLOAT16 } func (p *LogicalType) CountSetFieldsLogicalType() int { - count := 0 - if (p.IsSetSTRING()) { - count++ - } - if (p.IsSetMAP()) { - count++ - } - if (p.IsSetLIST()) { - count++ - } - if (p.IsSetENUM()) { - count++ - } - if (p.IsSetDECIMAL()) { - count++ - } - if (p.IsSetDATE()) { - count++ - } - if (p.IsSetTIME()) { - count++ - } - if (p.IsSetTIMESTAMP()) { - count++ - } - if (p.IsSetINTEGER()) { - count++ - } - if (p.IsSetUNKNOWN()) { - count++ - } - if (p.IsSetJSON()) { - count++ - } - if (p.IsSetBSON()) { - count++ - } - if (p.IsSetUUID()) { - count++ - } - if (p.IsSetFLOAT16()) { - count++ - } - return count + count := 0 + if p.IsSetSTRING() { + count++ + } + if p.IsSetMAP() { + count++ + } + if p.IsSetLIST() { + count++ + } + if p.IsSetENUM() { + count++ + } + if p.IsSetDECIMAL() { + count++ + } + if p.IsSetDATE() { + count++ + } + if p.IsSetTIME() { + count++ + } + if p.IsSetTIMESTAMP() { + count++ + } + if p.IsSetINTEGER() { + count++ + } + if p.IsSetUNKNOWN() { + count++ + } + if p.IsSetJSON() { + count++ + } + if p.IsSetBSON() { + count++ + } + if p.IsSetUUID() { + count++ + } + if p.IsSetFLOAT16() { + count++ + } + return count } func (p *LogicalType) IsSetSTRING() bool { - return p.STRING != nil + return p.STRING != nil } func (p *LogicalType) IsSetMAP() bool { - return p.MAP != nil + return p.MAP != nil } func (p *LogicalType) IsSetLIST() bool { - return p.LIST != nil + return p.LIST != nil } func (p *LogicalType) IsSetENUM() bool { - return p.ENUM != nil + return p.ENUM != nil } func (p *LogicalType) IsSetDECIMAL() bool { - return p.DECIMAL != nil + return p.DECIMAL != nil } func (p *LogicalType) IsSetDATE() bool { - return p.DATE != nil + return p.DATE != nil } func (p *LogicalType) IsSetTIME() bool { - return p.TIME != nil + return p.TIME != nil } func (p *LogicalType) IsSetTIMESTAMP() bool { - return p.TIMESTAMP != nil + return p.TIMESTAMP != nil } func (p *LogicalType) IsSetINTEGER() bool { - return p.INTEGER != nil + return p.INTEGER != nil } func (p *LogicalType) IsSetUNKNOWN() bool { - return p.UNKNOWN != nil + return p.UNKNOWN != nil } func (p *LogicalType) IsSetJSON() bool { - return p.JSON != nil + return p.JSON != nil } func (p *LogicalType) IsSetBSON() bool { - return p.BSON != nil + return p.BSON != nil } func (p *LogicalType) IsSetUUID() bool { - return p.UUID != nil + return p.UUID != nil } func (p *LogicalType) IsSetFLOAT16() bool { - return p.FLOAT16 != nil + return p.FLOAT16 != nil } func (p *LogicalType) Read(ctx context.Context, iprot thrift.TProtocol) error { - if _, err := iprot.ReadStructBegin(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) - } - - - for { - _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) - if err != nil { - return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) - } - if fieldTypeId == thrift.STOP { break; } - switch fieldId { - case 1: - if fieldTypeId == thrift.STRUCT { - if err := p.ReadField1(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 2: - if fieldTypeId == thrift.STRUCT { - if err := p.ReadField2(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 3: - if fieldTypeId == thrift.STRUCT { - if err := p.ReadField3(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 4: - if fieldTypeId == thrift.STRUCT { - if err := p.ReadField4(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 5: - if fieldTypeId == thrift.STRUCT { - if err := p.ReadField5(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 6: - if fieldTypeId == thrift.STRUCT { - if err := p.ReadField6(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 7: - if fieldTypeId == thrift.STRUCT { - if err := p.ReadField7(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 8: - if fieldTypeId == thrift.STRUCT { - if err := p.ReadField8(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 10: - if fieldTypeId == thrift.STRUCT { - if err := p.ReadField10(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 11: - if fieldTypeId == thrift.STRUCT { - if err := p.ReadField11(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 12: - if fieldTypeId == thrift.STRUCT { - if err := p.ReadField12(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 13: - if fieldTypeId == thrift.STRUCT { - if err := p.ReadField13(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 14: - if fieldTypeId == thrift.STRUCT { - if err := p.ReadField14(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 15: - if fieldTypeId == thrift.STRUCT { - if err := p.ReadField15(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - default: - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - if err := iprot.ReadFieldEnd(ctx); err != nil { - return err - } - } - if err := iprot.ReadStructEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) - } - return nil -} - -func (p *LogicalType) ReadField1(ctx context.Context, iprot thrift.TProtocol) error { - p.STRING = &StringType{} - if err := p.STRING.Read(ctx, iprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.STRING), err) - } - return nil -} - -func (p *LogicalType) ReadField2(ctx context.Context, iprot thrift.TProtocol) error { - p.MAP = &MapType{} - if err := p.MAP.Read(ctx, iprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.MAP), err) - } - return nil -} - -func (p *LogicalType) ReadField3(ctx context.Context, iprot thrift.TProtocol) error { - p.LIST = &ListType{} - if err := p.LIST.Read(ctx, iprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.LIST), err) - } - return nil -} - -func (p *LogicalType) ReadField4(ctx context.Context, iprot thrift.TProtocol) error { - p.ENUM = &EnumType{} - if err := p.ENUM.Read(ctx, iprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.ENUM), err) - } - return nil -} - -func (p *LogicalType) ReadField5(ctx context.Context, iprot thrift.TProtocol) error { - p.DECIMAL = &DecimalType{} - if err := p.DECIMAL.Read(ctx, iprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.DECIMAL), err) - } - return nil -} - -func (p *LogicalType) ReadField6(ctx context.Context, iprot thrift.TProtocol) error { - p.DATE = &DateType{} - if err := p.DATE.Read(ctx, iprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.DATE), err) - } - return nil -} - -func (p *LogicalType) ReadField7(ctx context.Context, iprot thrift.TProtocol) error { - p.TIME = &TimeType{} - if err := p.TIME.Read(ctx, iprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.TIME), err) - } - return nil -} - -func (p *LogicalType) ReadField8(ctx context.Context, iprot thrift.TProtocol) error { - p.TIMESTAMP = &TimestampType{} - if err := p.TIMESTAMP.Read(ctx, iprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.TIMESTAMP), err) - } - return nil -} - -func (p *LogicalType) ReadField10(ctx context.Context, iprot thrift.TProtocol) error { - p.INTEGER = &IntType{} - if err := p.INTEGER.Read(ctx, iprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.INTEGER), err) - } - return nil -} - -func (p *LogicalType) ReadField11(ctx context.Context, iprot thrift.TProtocol) error { - p.UNKNOWN = &NullType{} - if err := p.UNKNOWN.Read(ctx, iprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.UNKNOWN), err) - } - return nil -} - -func (p *LogicalType) ReadField12(ctx context.Context, iprot thrift.TProtocol) error { - p.JSON = &JsonType{} - if err := p.JSON.Read(ctx, iprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.JSON), err) - } - return nil -} - -func (p *LogicalType) ReadField13(ctx context.Context, iprot thrift.TProtocol) error { - p.BSON = &BsonType{} - if err := p.BSON.Read(ctx, iprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.BSON), err) - } - return nil + if _, err := iprot.ReadStructBegin(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) + } + + for { + _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) + if err != nil { + return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) + } + if fieldTypeId == thrift.STOP { + break + } + switch fieldId { + case 1: + if fieldTypeId == thrift.STRUCT { + if err := p.ReadField1(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 2: + if fieldTypeId == thrift.STRUCT { + if err := p.ReadField2(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 3: + if fieldTypeId == thrift.STRUCT { + if err := p.ReadField3(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 4: + if fieldTypeId == thrift.STRUCT { + if err := p.ReadField4(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 5: + if fieldTypeId == thrift.STRUCT { + if err := p.ReadField5(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 6: + if fieldTypeId == thrift.STRUCT { + if err := p.ReadField6(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 7: + if fieldTypeId == thrift.STRUCT { + if err := p.ReadField7(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 8: + if fieldTypeId == thrift.STRUCT { + if err := p.ReadField8(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 10: + if fieldTypeId == thrift.STRUCT { + if err := p.ReadField10(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 11: + if fieldTypeId == thrift.STRUCT { + if err := p.ReadField11(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 12: + if fieldTypeId == thrift.STRUCT { + if err := p.ReadField12(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 13: + if fieldTypeId == thrift.STRUCT { + if err := p.ReadField13(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 14: + if fieldTypeId == thrift.STRUCT { + if err := p.ReadField14(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 15: + if fieldTypeId == thrift.STRUCT { + if err := p.ReadField15(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + default: + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + if err := iprot.ReadFieldEnd(ctx); err != nil { + return err + } + } + if err := iprot.ReadStructEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) + } + return nil +} + +func (p *LogicalType) ReadField1(ctx context.Context, iprot thrift.TProtocol) error { + p.STRING = &StringType{} + if err := p.STRING.Read(ctx, iprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.STRING), err) + } + return nil +} + +func (p *LogicalType) ReadField2(ctx context.Context, iprot thrift.TProtocol) error { + p.MAP = &MapType{} + if err := p.MAP.Read(ctx, iprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.MAP), err) + } + return nil +} + +func (p *LogicalType) ReadField3(ctx context.Context, iprot thrift.TProtocol) error { + p.LIST = &ListType{} + if err := p.LIST.Read(ctx, iprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.LIST), err) + } + return nil +} + +func (p *LogicalType) ReadField4(ctx context.Context, iprot thrift.TProtocol) error { + p.ENUM = &EnumType{} + if err := p.ENUM.Read(ctx, iprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.ENUM), err) + } + return nil +} + +func (p *LogicalType) ReadField5(ctx context.Context, iprot thrift.TProtocol) error { + p.DECIMAL = &DecimalType{} + if err := p.DECIMAL.Read(ctx, iprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.DECIMAL), err) + } + return nil +} + +func (p *LogicalType) ReadField6(ctx context.Context, iprot thrift.TProtocol) error { + p.DATE = &DateType{} + if err := p.DATE.Read(ctx, iprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.DATE), err) + } + return nil +} + +func (p *LogicalType) ReadField7(ctx context.Context, iprot thrift.TProtocol) error { + p.TIME = &TimeType{} + if err := p.TIME.Read(ctx, iprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.TIME), err) + } + return nil +} + +func (p *LogicalType) ReadField8(ctx context.Context, iprot thrift.TProtocol) error { + p.TIMESTAMP = &TimestampType{} + if err := p.TIMESTAMP.Read(ctx, iprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.TIMESTAMP), err) + } + return nil +} + +func (p *LogicalType) ReadField10(ctx context.Context, iprot thrift.TProtocol) error { + p.INTEGER = &IntType{} + if err := p.INTEGER.Read(ctx, iprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.INTEGER), err) + } + return nil +} + +func (p *LogicalType) ReadField11(ctx context.Context, iprot thrift.TProtocol) error { + p.UNKNOWN = &NullType{} + if err := p.UNKNOWN.Read(ctx, iprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.UNKNOWN), err) + } + return nil +} + +func (p *LogicalType) ReadField12(ctx context.Context, iprot thrift.TProtocol) error { + p.JSON = &JsonType{} + if err := p.JSON.Read(ctx, iprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.JSON), err) + } + return nil +} + +func (p *LogicalType) ReadField13(ctx context.Context, iprot thrift.TProtocol) error { + p.BSON = &BsonType{} + if err := p.BSON.Read(ctx, iprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.BSON), err) + } + return nil } -func (p *LogicalType) ReadField14(ctx context.Context, iprot thrift.TProtocol) error { - p.UUID = &UUIDType{} - if err := p.UUID.Read(ctx, iprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.UUID), err) - } - return nil +func (p *LogicalType) ReadField14(ctx context.Context, iprot thrift.TProtocol) error { + p.UUID = &UUIDType{} + if err := p.UUID.Read(ctx, iprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.UUID), err) + } + return nil } -func (p *LogicalType) ReadField15(ctx context.Context, iprot thrift.TProtocol) error { - p.FLOAT16 = &Float16Type{} - if err := p.FLOAT16.Read(ctx, iprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.FLOAT16), err) - } - return nil +func (p *LogicalType) ReadField15(ctx context.Context, iprot thrift.TProtocol) error { + p.FLOAT16 = &Float16Type{} + if err := p.FLOAT16.Read(ctx, iprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.FLOAT16), err) + } + return nil } func (p *LogicalType) Write(ctx context.Context, oprot thrift.TProtocol) error { - if c := p.CountSetFieldsLogicalType(); c != 1 { - return fmt.Errorf("%T write union: exactly one field must be set (%d set)", p, c) - } - if err := oprot.WriteStructBegin(ctx, "LogicalType"); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) } - if p != nil { - if err := p.writeField1(ctx, oprot); err != nil { return err } - if err := p.writeField2(ctx, oprot); err != nil { return err } - if err := p.writeField3(ctx, oprot); err != nil { return err } - if err := p.writeField4(ctx, oprot); err != nil { return err } - if err := p.writeField5(ctx, oprot); err != nil { return err } - if err := p.writeField6(ctx, oprot); err != nil { return err } - if err := p.writeField7(ctx, oprot); err != nil { return err } - if err := p.writeField8(ctx, oprot); err != nil { return err } - if err := p.writeField10(ctx, oprot); err != nil { return err } - if err := p.writeField11(ctx, oprot); err != nil { return err } - if err := p.writeField12(ctx, oprot); err != nil { return err } - if err := p.writeField13(ctx, oprot); err != nil { return err } - if err := p.writeField14(ctx, oprot); err != nil { return err } - if err := p.writeField15(ctx, oprot); err != nil { return err } - } - if err := oprot.WriteFieldStop(ctx); err != nil { - return thrift.PrependError("write field stop error: ", err) } - if err := oprot.WriteStructEnd(ctx); err != nil { - return thrift.PrependError("write struct stop error: ", err) } - return nil + if c := p.CountSetFieldsLogicalType(); c != 1 { + return fmt.Errorf("%T write union: exactly one field must be set (%d set)", p, c) + } + if err := oprot.WriteStructBegin(ctx, "LogicalType"); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) + } + if p != nil { + if err := p.writeField1(ctx, oprot); err != nil { + return err + } + if err := p.writeField2(ctx, oprot); err != nil { + return err + } + if err := p.writeField3(ctx, oprot); err != nil { + return err + } + if err := p.writeField4(ctx, oprot); err != nil { + return err + } + if err := p.writeField5(ctx, oprot); err != nil { + return err + } + if err := p.writeField6(ctx, oprot); err != nil { + return err + } + if err := p.writeField7(ctx, oprot); err != nil { + return err + } + if err := p.writeField8(ctx, oprot); err != nil { + return err + } + if err := p.writeField10(ctx, oprot); err != nil { + return err + } + if err := p.writeField11(ctx, oprot); err != nil { + return err + } + if err := p.writeField12(ctx, oprot); err != nil { + return err + } + if err := p.writeField13(ctx, oprot); err != nil { + return err + } + if err := p.writeField14(ctx, oprot); err != nil { + return err + } + if err := p.writeField15(ctx, oprot); err != nil { + return err + } + } + if err := oprot.WriteFieldStop(ctx); err != nil { + return thrift.PrependError("write field stop error: ", err) + } + if err := oprot.WriteStructEnd(ctx); err != nil { + return thrift.PrependError("write struct stop error: ", err) + } + return nil } func (p *LogicalType) writeField1(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetSTRING() { - if err := oprot.WriteFieldBegin(ctx, "STRING", thrift.STRUCT, 1); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:STRING: ", p), err) } - if err := p.STRING.Write(ctx, oprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.STRING), err) - } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 1:STRING: ", p), err) } - } - return err + if p.IsSetSTRING() { + if err := oprot.WriteFieldBegin(ctx, "STRING", thrift.STRUCT, 1); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:STRING: ", p), err) + } + if err := p.STRING.Write(ctx, oprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.STRING), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 1:STRING: ", p), err) + } + } + return err } func (p *LogicalType) writeField2(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetMAP() { - if err := oprot.WriteFieldBegin(ctx, "MAP", thrift.STRUCT, 2); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:MAP: ", p), err) } - if err := p.MAP.Write(ctx, oprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.MAP), err) - } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 2:MAP: ", p), err) } - } - return err + if p.IsSetMAP() { + if err := oprot.WriteFieldBegin(ctx, "MAP", thrift.STRUCT, 2); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:MAP: ", p), err) + } + if err := p.MAP.Write(ctx, oprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.MAP), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 2:MAP: ", p), err) + } + } + return err } func (p *LogicalType) writeField3(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetLIST() { - if err := oprot.WriteFieldBegin(ctx, "LIST", thrift.STRUCT, 3); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 3:LIST: ", p), err) } - if err := p.LIST.Write(ctx, oprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.LIST), err) - } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 3:LIST: ", p), err) } - } - return err + if p.IsSetLIST() { + if err := oprot.WriteFieldBegin(ctx, "LIST", thrift.STRUCT, 3); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 3:LIST: ", p), err) + } + if err := p.LIST.Write(ctx, oprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.LIST), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 3:LIST: ", p), err) + } + } + return err } func (p *LogicalType) writeField4(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetENUM() { - if err := oprot.WriteFieldBegin(ctx, "ENUM", thrift.STRUCT, 4); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 4:ENUM: ", p), err) } - if err := p.ENUM.Write(ctx, oprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.ENUM), err) - } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 4:ENUM: ", p), err) } - } - return err + if p.IsSetENUM() { + if err := oprot.WriteFieldBegin(ctx, "ENUM", thrift.STRUCT, 4); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 4:ENUM: ", p), err) + } + if err := p.ENUM.Write(ctx, oprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.ENUM), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 4:ENUM: ", p), err) + } + } + return err } func (p *LogicalType) writeField5(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetDECIMAL() { - if err := oprot.WriteFieldBegin(ctx, "DECIMAL", thrift.STRUCT, 5); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 5:DECIMAL: ", p), err) } - if err := p.DECIMAL.Write(ctx, oprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.DECIMAL), err) - } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 5:DECIMAL: ", p), err) } - } - return err + if p.IsSetDECIMAL() { + if err := oprot.WriteFieldBegin(ctx, "DECIMAL", thrift.STRUCT, 5); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 5:DECIMAL: ", p), err) + } + if err := p.DECIMAL.Write(ctx, oprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.DECIMAL), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 5:DECIMAL: ", p), err) + } + } + return err } func (p *LogicalType) writeField6(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetDATE() { - if err := oprot.WriteFieldBegin(ctx, "DATE", thrift.STRUCT, 6); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 6:DATE: ", p), err) } - if err := p.DATE.Write(ctx, oprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.DATE), err) - } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 6:DATE: ", p), err) } - } - return err + if p.IsSetDATE() { + if err := oprot.WriteFieldBegin(ctx, "DATE", thrift.STRUCT, 6); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 6:DATE: ", p), err) + } + if err := p.DATE.Write(ctx, oprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.DATE), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 6:DATE: ", p), err) + } + } + return err } func (p *LogicalType) writeField7(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetTIME() { - if err := oprot.WriteFieldBegin(ctx, "TIME", thrift.STRUCT, 7); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 7:TIME: ", p), err) } - if err := p.TIME.Write(ctx, oprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.TIME), err) - } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 7:TIME: ", p), err) } - } - return err + if p.IsSetTIME() { + if err := oprot.WriteFieldBegin(ctx, "TIME", thrift.STRUCT, 7); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 7:TIME: ", p), err) + } + if err := p.TIME.Write(ctx, oprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.TIME), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 7:TIME: ", p), err) + } + } + return err } func (p *LogicalType) writeField8(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetTIMESTAMP() { - if err := oprot.WriteFieldBegin(ctx, "TIMESTAMP", thrift.STRUCT, 8); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 8:TIMESTAMP: ", p), err) } - if err := p.TIMESTAMP.Write(ctx, oprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.TIMESTAMP), err) - } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 8:TIMESTAMP: ", p), err) } - } - return err + if p.IsSetTIMESTAMP() { + if err := oprot.WriteFieldBegin(ctx, "TIMESTAMP", thrift.STRUCT, 8); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 8:TIMESTAMP: ", p), err) + } + if err := p.TIMESTAMP.Write(ctx, oprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.TIMESTAMP), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 8:TIMESTAMP: ", p), err) + } + } + return err } func (p *LogicalType) writeField10(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetINTEGER() { - if err := oprot.WriteFieldBegin(ctx, "INTEGER", thrift.STRUCT, 10); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 10:INTEGER: ", p), err) } - if err := p.INTEGER.Write(ctx, oprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.INTEGER), err) - } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 10:INTEGER: ", p), err) } - } - return err + if p.IsSetINTEGER() { + if err := oprot.WriteFieldBegin(ctx, "INTEGER", thrift.STRUCT, 10); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 10:INTEGER: ", p), err) + } + if err := p.INTEGER.Write(ctx, oprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.INTEGER), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 10:INTEGER: ", p), err) + } + } + return err } func (p *LogicalType) writeField11(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetUNKNOWN() { - if err := oprot.WriteFieldBegin(ctx, "UNKNOWN", thrift.STRUCT, 11); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 11:UNKNOWN: ", p), err) } - if err := p.UNKNOWN.Write(ctx, oprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.UNKNOWN), err) - } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 11:UNKNOWN: ", p), err) } - } - return err + if p.IsSetUNKNOWN() { + if err := oprot.WriteFieldBegin(ctx, "UNKNOWN", thrift.STRUCT, 11); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 11:UNKNOWN: ", p), err) + } + if err := p.UNKNOWN.Write(ctx, oprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.UNKNOWN), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 11:UNKNOWN: ", p), err) + } + } + return err } func (p *LogicalType) writeField12(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetJSON() { - if err := oprot.WriteFieldBegin(ctx, "JSON", thrift.STRUCT, 12); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 12:JSON: ", p), err) } - if err := p.JSON.Write(ctx, oprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.JSON), err) - } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 12:JSON: ", p), err) } - } - return err + if p.IsSetJSON() { + if err := oprot.WriteFieldBegin(ctx, "JSON", thrift.STRUCT, 12); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 12:JSON: ", p), err) + } + if err := p.JSON.Write(ctx, oprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.JSON), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 12:JSON: ", p), err) + } + } + return err } func (p *LogicalType) writeField13(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetBSON() { - if err := oprot.WriteFieldBegin(ctx, "BSON", thrift.STRUCT, 13); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 13:BSON: ", p), err) } - if err := p.BSON.Write(ctx, oprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.BSON), err) - } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 13:BSON: ", p), err) } - } - return err + if p.IsSetBSON() { + if err := oprot.WriteFieldBegin(ctx, "BSON", thrift.STRUCT, 13); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 13:BSON: ", p), err) + } + if err := p.BSON.Write(ctx, oprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.BSON), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 13:BSON: ", p), err) + } + } + return err } func (p *LogicalType) writeField14(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetUUID() { - if err := oprot.WriteFieldBegin(ctx, "UUID", thrift.STRUCT, 14); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 14:UUID: ", p), err) } - if err := p.UUID.Write(ctx, oprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.UUID), err) - } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 14:UUID: ", p), err) } - } - return err + if p.IsSetUUID() { + if err := oprot.WriteFieldBegin(ctx, "UUID", thrift.STRUCT, 14); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 14:UUID: ", p), err) + } + if err := p.UUID.Write(ctx, oprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.UUID), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 14:UUID: ", p), err) + } + } + return err } func (p *LogicalType) writeField15(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetFLOAT16() { - if err := oprot.WriteFieldBegin(ctx, "FLOAT16", thrift.STRUCT, 15); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 15:FLOAT16: ", p), err) } - if err := p.FLOAT16.Write(ctx, oprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.FLOAT16), err) - } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 15:FLOAT16: ", p), err) } - } - return err + if p.IsSetFLOAT16() { + if err := oprot.WriteFieldBegin(ctx, "FLOAT16", thrift.STRUCT, 15); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 15:FLOAT16: ", p), err) + } + if err := p.FLOAT16.Write(ctx, oprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.FLOAT16), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 15:FLOAT16: ", p), err) + } + } + return err } func (p *LogicalType) Equals(other *LogicalType) bool { - if p == other { - return true - } else if p == nil || other == nil { - return false - } - if !p.STRING.Equals(other.STRING) { return false } - if !p.MAP.Equals(other.MAP) { return false } - if !p.LIST.Equals(other.LIST) { return false } - if !p.ENUM.Equals(other.ENUM) { return false } - if !p.DECIMAL.Equals(other.DECIMAL) { return false } - if !p.DATE.Equals(other.DATE) { return false } - if !p.TIME.Equals(other.TIME) { return false } - if !p.TIMESTAMP.Equals(other.TIMESTAMP) { return false } - if !p.INTEGER.Equals(other.INTEGER) { return false } - if !p.UNKNOWN.Equals(other.UNKNOWN) { return false } - if !p.JSON.Equals(other.JSON) { return false } - if !p.BSON.Equals(other.BSON) { return false } - if !p.UUID.Equals(other.UUID) { return false } - if !p.FLOAT16.Equals(other.FLOAT16) { return false } - return true + if p == other { + return true + } else if p == nil || other == nil { + return false + } + if !p.STRING.Equals(other.STRING) { + return false + } + if !p.MAP.Equals(other.MAP) { + return false + } + if !p.LIST.Equals(other.LIST) { + return false + } + if !p.ENUM.Equals(other.ENUM) { + return false + } + if !p.DECIMAL.Equals(other.DECIMAL) { + return false + } + if !p.DATE.Equals(other.DATE) { + return false + } + if !p.TIME.Equals(other.TIME) { + return false + } + if !p.TIMESTAMP.Equals(other.TIMESTAMP) { + return false + } + if !p.INTEGER.Equals(other.INTEGER) { + return false + } + if !p.UNKNOWN.Equals(other.UNKNOWN) { + return false + } + if !p.JSON.Equals(other.JSON) { + return false + } + if !p.BSON.Equals(other.BSON) { + return false + } + if !p.UUID.Equals(other.UUID) { + return false + } + if !p.FLOAT16.Equals(other.FLOAT16) { + return false + } + return true } func (p *LogicalType) String() string { - if p == nil { - return "" - } - return fmt.Sprintf("LogicalType(%+v)", *p) + if p == nil { + return "" + } + return fmt.Sprintf("LogicalType(%+v)", *p) } func (p *LogicalType) Validate() error { - return nil + return nil } + // Represents a element inside a schema definition. -// - if it is a group (inner node) then type is undefined and num_children is defined -// - if it is a primitive type (leaf) then type is defined and num_children is undefined +// - if it is a group (inner node) then type is undefined and num_children is defined +// - if it is a primitive type (leaf) then type is defined and num_children is undefined +// // the nodes are listed in depth first traversal order. -// +// // Attributes: -// - Type: Data type for this field. Not set if the current element is a non-leaf node -// - TypeLength: If type is FIXED_LEN_BYTE_ARRAY, this is the byte length of the values. +// - Type: Data type for this field. Not set if the current element is a non-leaf node +// - TypeLength: If type is FIXED_LEN_BYTE_ARRAY, this is the byte length of the values. +// // Otherwise, if specified, this is the maximum bit length to store any of the values. // (e.g. a low cardinality INT col could have this set to 3). Note that this is // in the schema, and therefore fixed for the entire file. -// - RepetitionType: repetition of the field. The root of the schema does not have a repetition_type. +// - RepetitionType: repetition of the field. The root of the schema does not have a repetition_type. +// // All other nodes must have one -// - Name: Name of the field in the schema -// - NumChildren: Nested fields. Since thrift does not support nested fields, +// - Name: Name of the field in the schema +// - NumChildren: Nested fields. Since thrift does not support nested fields, +// // the nesting is flattened to a single list by a depth-first traversal. // The children count is used to construct the nested relationship. // This field is not set when the element is a primitive type -// - ConvertedType: DEPRECATED: When the schema is the result of a conversion from another model. +// - ConvertedType: DEPRECATED: When the schema is the result of a conversion from another model. +// // Used to record the original type to help with cross conversion. -// +// // This is superseded by logicalType. -// - Scale: DEPRECATED: Used when this column contains decimal data. +// - Scale: DEPRECATED: Used when this column contains decimal data. +// // See the DECIMAL converted type for more details. -// +// // This is superseded by using the DecimalType annotation in logicalType. -// - Precision -// - FieldID: When the original schema supports field ids, this will save the +// - Precision +// - FieldID: When the original schema supports field ids, this will save the +// // original field id in the parquet schema -// - LogicalType: The logical type of this SchemaElement -// +// - LogicalType: The logical type of this SchemaElement +// // LogicalType replaces ConvertedType, but ConvertedType is still required // for some logical types to ensure forward-compatibility in format v1. type SchemaElement struct { - Type *Type `thrift:"type,1" db:"type" json:"type,omitempty"` - TypeLength *int32 `thrift:"type_length,2" db:"type_length" json:"type_length,omitempty"` - RepetitionType *FieldRepetitionType `thrift:"repetition_type,3" db:"repetition_type" json:"repetition_type,omitempty"` - Name string `thrift:"name,4,required" db:"name" json:"name"` - NumChildren *int32 `thrift:"num_children,5" db:"num_children" json:"num_children,omitempty"` - ConvertedType *ConvertedType `thrift:"converted_type,6" db:"converted_type" json:"converted_type,omitempty"` - Scale *int32 `thrift:"scale,7" db:"scale" json:"scale,omitempty"` - Precision *int32 `thrift:"precision,8" db:"precision" json:"precision,omitempty"` - FieldID *int32 `thrift:"field_id,9" db:"field_id" json:"field_id,omitempty"` - LogicalType *LogicalType `thrift:"logicalType,10" db:"logicalType" json:"logicalType,omitempty"` + Type *Type `thrift:"type,1" db:"type" json:"type,omitempty"` + TypeLength *int32 `thrift:"type_length,2" db:"type_length" json:"type_length,omitempty"` + RepetitionType *FieldRepetitionType `thrift:"repetition_type,3" db:"repetition_type" json:"repetition_type,omitempty"` + Name string `thrift:"name,4,required" db:"name" json:"name"` + NumChildren *int32 `thrift:"num_children,5" db:"num_children" json:"num_children,omitempty"` + ConvertedType *ConvertedType `thrift:"converted_type,6" db:"converted_type" json:"converted_type,omitempty"` + Scale *int32 `thrift:"scale,7" db:"scale" json:"scale,omitempty"` + Precision *int32 `thrift:"precision,8" db:"precision" json:"precision,omitempty"` + FieldID *int32 `thrift:"field_id,9" db:"field_id" json:"field_id,omitempty"` + LogicalType *LogicalType `thrift:"logicalType,10" db:"logicalType" json:"logicalType,omitempty"` } func NewSchemaElement() *SchemaElement { - return &SchemaElement{} + return &SchemaElement{} } var SchemaElement_Type_DEFAULT Type + func (p *SchemaElement) GetType() Type { - if !p.IsSetType() { - return SchemaElement_Type_DEFAULT - } -return *p.Type + if !p.IsSetType() { + return SchemaElement_Type_DEFAULT + } + return *p.Type } + var SchemaElement_TypeLength_DEFAULT int32 + func (p *SchemaElement) GetTypeLength() int32 { - if !p.IsSetTypeLength() { - return SchemaElement_TypeLength_DEFAULT - } -return *p.TypeLength + if !p.IsSetTypeLength() { + return SchemaElement_TypeLength_DEFAULT + } + return *p.TypeLength } + var SchemaElement_RepetitionType_DEFAULT FieldRepetitionType + func (p *SchemaElement) GetRepetitionType() FieldRepetitionType { - if !p.IsSetRepetitionType() { - return SchemaElement_RepetitionType_DEFAULT - } -return *p.RepetitionType + if !p.IsSetRepetitionType() { + return SchemaElement_RepetitionType_DEFAULT + } + return *p.RepetitionType } func (p *SchemaElement) GetName() string { - return p.Name + return p.Name } + var SchemaElement_NumChildren_DEFAULT int32 + func (p *SchemaElement) GetNumChildren() int32 { - if !p.IsSetNumChildren() { - return SchemaElement_NumChildren_DEFAULT - } -return *p.NumChildren + if !p.IsSetNumChildren() { + return SchemaElement_NumChildren_DEFAULT + } + return *p.NumChildren } + var SchemaElement_ConvertedType_DEFAULT ConvertedType + func (p *SchemaElement) GetConvertedType() ConvertedType { - if !p.IsSetConvertedType() { - return SchemaElement_ConvertedType_DEFAULT - } -return *p.ConvertedType + if !p.IsSetConvertedType() { + return SchemaElement_ConvertedType_DEFAULT + } + return *p.ConvertedType } + var SchemaElement_Scale_DEFAULT int32 + func (p *SchemaElement) GetScale() int32 { - if !p.IsSetScale() { - return SchemaElement_Scale_DEFAULT - } -return *p.Scale + if !p.IsSetScale() { + return SchemaElement_Scale_DEFAULT + } + return *p.Scale } + var SchemaElement_Precision_DEFAULT int32 + func (p *SchemaElement) GetPrecision() int32 { - if !p.IsSetPrecision() { - return SchemaElement_Precision_DEFAULT - } -return *p.Precision + if !p.IsSetPrecision() { + return SchemaElement_Precision_DEFAULT + } + return *p.Precision } + var SchemaElement_FieldID_DEFAULT int32 + func (p *SchemaElement) GetFieldID() int32 { - if !p.IsSetFieldID() { - return SchemaElement_FieldID_DEFAULT - } -return *p.FieldID + if !p.IsSetFieldID() { + return SchemaElement_FieldID_DEFAULT + } + return *p.FieldID } + var SchemaElement_LogicalType_DEFAULT *LogicalType + func (p *SchemaElement) GetLogicalType() *LogicalType { - if !p.IsSetLogicalType() { - return SchemaElement_LogicalType_DEFAULT - } -return p.LogicalType + if !p.IsSetLogicalType() { + return SchemaElement_LogicalType_DEFAULT + } + return p.LogicalType } func (p *SchemaElement) IsSetType() bool { - return p.Type != nil + return p.Type != nil } func (p *SchemaElement) IsSetTypeLength() bool { - return p.TypeLength != nil + return p.TypeLength != nil } func (p *SchemaElement) IsSetRepetitionType() bool { - return p.RepetitionType != nil + return p.RepetitionType != nil } func (p *SchemaElement) IsSetNumChildren() bool { - return p.NumChildren != nil + return p.NumChildren != nil } func (p *SchemaElement) IsSetConvertedType() bool { - return p.ConvertedType != nil + return p.ConvertedType != nil } func (p *SchemaElement) IsSetScale() bool { - return p.Scale != nil + return p.Scale != nil } func (p *SchemaElement) IsSetPrecision() bool { - return p.Precision != nil + return p.Precision != nil } func (p *SchemaElement) IsSetFieldID() bool { - return p.FieldID != nil + return p.FieldID != nil } func (p *SchemaElement) IsSetLogicalType() bool { - return p.LogicalType != nil + return p.LogicalType != nil } func (p *SchemaElement) Read(ctx context.Context, iprot thrift.TProtocol) error { - if _, err := iprot.ReadStructBegin(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) - } - - var issetName bool = false; - - for { - _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) - if err != nil { - return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) - } - if fieldTypeId == thrift.STOP { break; } - switch fieldId { - case 1: - if fieldTypeId == thrift.I32 { - if err := p.ReadField1(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 2: - if fieldTypeId == thrift.I32 { - if err := p.ReadField2(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 3: - if fieldTypeId == thrift.I32 { - if err := p.ReadField3(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 4: - if fieldTypeId == thrift.STRING { - if err := p.ReadField4(ctx, iprot); err != nil { - return err - } - issetName = true - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 5: - if fieldTypeId == thrift.I32 { - if err := p.ReadField5(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 6: - if fieldTypeId == thrift.I32 { - if err := p.ReadField6(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 7: - if fieldTypeId == thrift.I32 { - if err := p.ReadField7(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 8: - if fieldTypeId == thrift.I32 { - if err := p.ReadField8(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 9: - if fieldTypeId == thrift.I32 { - if err := p.ReadField9(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 10: - if fieldTypeId == thrift.STRUCT { - if err := p.ReadField10(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - default: - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - if err := iprot.ReadFieldEnd(ctx); err != nil { - return err - } - } - if err := iprot.ReadStructEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) - } - if !issetName{ - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Name is not set")); - } - return nil -} - -func (p *SchemaElement) ReadField1(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadI32(ctx); err != nil { - return thrift.PrependError("error reading field 1: ", err) -} else { - temp := Type(v) - p.Type = &temp -} - return nil -} - -func (p *SchemaElement) ReadField2(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadI32(ctx); err != nil { - return thrift.PrependError("error reading field 2: ", err) -} else { - p.TypeLength = &v -} - return nil -} - -func (p *SchemaElement) ReadField3(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadI32(ctx); err != nil { - return thrift.PrependError("error reading field 3: ", err) -} else { - temp := FieldRepetitionType(v) - p.RepetitionType = &temp -} - return nil -} - -func (p *SchemaElement) ReadField4(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadString(ctx); err != nil { - return thrift.PrependError("error reading field 4: ", err) -} else { - p.Name = v -} - return nil -} - -func (p *SchemaElement) ReadField5(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadI32(ctx); err != nil { - return thrift.PrependError("error reading field 5: ", err) -} else { - p.NumChildren = &v -} - return nil -} - -func (p *SchemaElement) ReadField6(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadI32(ctx); err != nil { - return thrift.PrependError("error reading field 6: ", err) -} else { - temp := ConvertedType(v) - p.ConvertedType = &temp -} - return nil -} - -func (p *SchemaElement) ReadField7(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadI32(ctx); err != nil { - return thrift.PrependError("error reading field 7: ", err) -} else { - p.Scale = &v -} - return nil -} - -func (p *SchemaElement) ReadField8(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadI32(ctx); err != nil { - return thrift.PrependError("error reading field 8: ", err) -} else { - p.Precision = &v -} - return nil -} - -func (p *SchemaElement) ReadField9(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadI32(ctx); err != nil { - return thrift.PrependError("error reading field 9: ", err) -} else { - p.FieldID = &v -} - return nil -} - -func (p *SchemaElement) ReadField10(ctx context.Context, iprot thrift.TProtocol) error { - p.LogicalType = &LogicalType{} - if err := p.LogicalType.Read(ctx, iprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.LogicalType), err) - } - return nil + if _, err := iprot.ReadStructBegin(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) + } + + var issetName bool = false + + for { + _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) + if err != nil { + return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) + } + if fieldTypeId == thrift.STOP { + break + } + switch fieldId { + case 1: + if fieldTypeId == thrift.I32 { + if err := p.ReadField1(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 2: + if fieldTypeId == thrift.I32 { + if err := p.ReadField2(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 3: + if fieldTypeId == thrift.I32 { + if err := p.ReadField3(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 4: + if fieldTypeId == thrift.STRING { + if err := p.ReadField4(ctx, iprot); err != nil { + return err + } + issetName = true + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 5: + if fieldTypeId == thrift.I32 { + if err := p.ReadField5(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 6: + if fieldTypeId == thrift.I32 { + if err := p.ReadField6(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 7: + if fieldTypeId == thrift.I32 { + if err := p.ReadField7(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 8: + if fieldTypeId == thrift.I32 { + if err := p.ReadField8(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 9: + if fieldTypeId == thrift.I32 { + if err := p.ReadField9(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 10: + if fieldTypeId == thrift.STRUCT { + if err := p.ReadField10(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + default: + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + if err := iprot.ReadFieldEnd(ctx); err != nil { + return err + } + } + if err := iprot.ReadStructEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) + } + if !issetName { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Name is not set")) + } + return nil +} + +func (p *SchemaElement) ReadField1(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadI32(ctx); err != nil { + return thrift.PrependError("error reading field 1: ", err) + } else { + temp := Type(v) + p.Type = &temp + } + return nil +} + +func (p *SchemaElement) ReadField2(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadI32(ctx); err != nil { + return thrift.PrependError("error reading field 2: ", err) + } else { + p.TypeLength = &v + } + return nil +} + +func (p *SchemaElement) ReadField3(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadI32(ctx); err != nil { + return thrift.PrependError("error reading field 3: ", err) + } else { + temp := FieldRepetitionType(v) + p.RepetitionType = &temp + } + return nil +} + +func (p *SchemaElement) ReadField4(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadString(ctx); err != nil { + return thrift.PrependError("error reading field 4: ", err) + } else { + p.Name = v + } + return nil +} + +func (p *SchemaElement) ReadField5(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadI32(ctx); err != nil { + return thrift.PrependError("error reading field 5: ", err) + } else { + p.NumChildren = &v + } + return nil +} + +func (p *SchemaElement) ReadField6(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadI32(ctx); err != nil { + return thrift.PrependError("error reading field 6: ", err) + } else { + temp := ConvertedType(v) + p.ConvertedType = &temp + } + return nil +} + +func (p *SchemaElement) ReadField7(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadI32(ctx); err != nil { + return thrift.PrependError("error reading field 7: ", err) + } else { + p.Scale = &v + } + return nil +} + +func (p *SchemaElement) ReadField8(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadI32(ctx); err != nil { + return thrift.PrependError("error reading field 8: ", err) + } else { + p.Precision = &v + } + return nil +} + +func (p *SchemaElement) ReadField9(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadI32(ctx); err != nil { + return thrift.PrependError("error reading field 9: ", err) + } else { + p.FieldID = &v + } + return nil +} + +func (p *SchemaElement) ReadField10(ctx context.Context, iprot thrift.TProtocol) error { + p.LogicalType = &LogicalType{} + if err := p.LogicalType.Read(ctx, iprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.LogicalType), err) + } + return nil } func (p *SchemaElement) Write(ctx context.Context, oprot thrift.TProtocol) error { - if err := oprot.WriteStructBegin(ctx, "SchemaElement"); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) } - if p != nil { - if err := p.writeField1(ctx, oprot); err != nil { return err } - if err := p.writeField2(ctx, oprot); err != nil { return err } - if err := p.writeField3(ctx, oprot); err != nil { return err } - if err := p.writeField4(ctx, oprot); err != nil { return err } - if err := p.writeField5(ctx, oprot); err != nil { return err } - if err := p.writeField6(ctx, oprot); err != nil { return err } - if err := p.writeField7(ctx, oprot); err != nil { return err } - if err := p.writeField8(ctx, oprot); err != nil { return err } - if err := p.writeField9(ctx, oprot); err != nil { return err } - if err := p.writeField10(ctx, oprot); err != nil { return err } - } - if err := oprot.WriteFieldStop(ctx); err != nil { - return thrift.PrependError("write field stop error: ", err) } - if err := oprot.WriteStructEnd(ctx); err != nil { - return thrift.PrependError("write struct stop error: ", err) } - return nil + if err := oprot.WriteStructBegin(ctx, "SchemaElement"); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) + } + if p != nil { + if err := p.writeField1(ctx, oprot); err != nil { + return err + } + if err := p.writeField2(ctx, oprot); err != nil { + return err + } + if err := p.writeField3(ctx, oprot); err != nil { + return err + } + if err := p.writeField4(ctx, oprot); err != nil { + return err + } + if err := p.writeField5(ctx, oprot); err != nil { + return err + } + if err := p.writeField6(ctx, oprot); err != nil { + return err + } + if err := p.writeField7(ctx, oprot); err != nil { + return err + } + if err := p.writeField8(ctx, oprot); err != nil { + return err + } + if err := p.writeField9(ctx, oprot); err != nil { + return err + } + if err := p.writeField10(ctx, oprot); err != nil { + return err + } + } + if err := oprot.WriteFieldStop(ctx); err != nil { + return thrift.PrependError("write field stop error: ", err) + } + if err := oprot.WriteStructEnd(ctx); err != nil { + return thrift.PrependError("write struct stop error: ", err) + } + return nil } func (p *SchemaElement) writeField1(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetType() { - if err := oprot.WriteFieldBegin(ctx, "type", thrift.I32, 1); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:type: ", p), err) } - if err := oprot.WriteI32(ctx, int32(*p.Type)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.type (1) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 1:type: ", p), err) } - } - return err + if p.IsSetType() { + if err := oprot.WriteFieldBegin(ctx, "type", thrift.I32, 1); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:type: ", p), err) + } + if err := oprot.WriteI32(ctx, int32(*p.Type)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.type (1) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 1:type: ", p), err) + } + } + return err } func (p *SchemaElement) writeField2(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetTypeLength() { - if err := oprot.WriteFieldBegin(ctx, "type_length", thrift.I32, 2); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:type_length: ", p), err) } - if err := oprot.WriteI32(ctx, int32(*p.TypeLength)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.type_length (2) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 2:type_length: ", p), err) } - } - return err + if p.IsSetTypeLength() { + if err := oprot.WriteFieldBegin(ctx, "type_length", thrift.I32, 2); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:type_length: ", p), err) + } + if err := oprot.WriteI32(ctx, int32(*p.TypeLength)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.type_length (2) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 2:type_length: ", p), err) + } + } + return err } func (p *SchemaElement) writeField3(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetRepetitionType() { - if err := oprot.WriteFieldBegin(ctx, "repetition_type", thrift.I32, 3); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 3:repetition_type: ", p), err) } - if err := oprot.WriteI32(ctx, int32(*p.RepetitionType)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.repetition_type (3) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 3:repetition_type: ", p), err) } - } - return err + if p.IsSetRepetitionType() { + if err := oprot.WriteFieldBegin(ctx, "repetition_type", thrift.I32, 3); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 3:repetition_type: ", p), err) + } + if err := oprot.WriteI32(ctx, int32(*p.RepetitionType)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.repetition_type (3) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 3:repetition_type: ", p), err) + } + } + return err } func (p *SchemaElement) writeField4(ctx context.Context, oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin(ctx, "name", thrift.STRING, 4); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 4:name: ", p), err) } - if err := oprot.WriteString(ctx, string(p.Name)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.name (4) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 4:name: ", p), err) } - return err + if err := oprot.WriteFieldBegin(ctx, "name", thrift.STRING, 4); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 4:name: ", p), err) + } + if err := oprot.WriteString(ctx, string(p.Name)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.name (4) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 4:name: ", p), err) + } + return err } func (p *SchemaElement) writeField5(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetNumChildren() { - if err := oprot.WriteFieldBegin(ctx, "num_children", thrift.I32, 5); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 5:num_children: ", p), err) } - if err := oprot.WriteI32(ctx, int32(*p.NumChildren)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.num_children (5) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 5:num_children: ", p), err) } - } - return err + if p.IsSetNumChildren() { + if err := oprot.WriteFieldBegin(ctx, "num_children", thrift.I32, 5); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 5:num_children: ", p), err) + } + if err := oprot.WriteI32(ctx, int32(*p.NumChildren)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.num_children (5) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 5:num_children: ", p), err) + } + } + return err } func (p *SchemaElement) writeField6(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetConvertedType() { - if err := oprot.WriteFieldBegin(ctx, "converted_type", thrift.I32, 6); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 6:converted_type: ", p), err) } - if err := oprot.WriteI32(ctx, int32(*p.ConvertedType)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.converted_type (6) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 6:converted_type: ", p), err) } - } - return err + if p.IsSetConvertedType() { + if err := oprot.WriteFieldBegin(ctx, "converted_type", thrift.I32, 6); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 6:converted_type: ", p), err) + } + if err := oprot.WriteI32(ctx, int32(*p.ConvertedType)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.converted_type (6) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 6:converted_type: ", p), err) + } + } + return err } func (p *SchemaElement) writeField7(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetScale() { - if err := oprot.WriteFieldBegin(ctx, "scale", thrift.I32, 7); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 7:scale: ", p), err) } - if err := oprot.WriteI32(ctx, int32(*p.Scale)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.scale (7) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 7:scale: ", p), err) } - } - return err + if p.IsSetScale() { + if err := oprot.WriteFieldBegin(ctx, "scale", thrift.I32, 7); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 7:scale: ", p), err) + } + if err := oprot.WriteI32(ctx, int32(*p.Scale)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.scale (7) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 7:scale: ", p), err) + } + } + return err } func (p *SchemaElement) writeField8(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetPrecision() { - if err := oprot.WriteFieldBegin(ctx, "precision", thrift.I32, 8); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 8:precision: ", p), err) } - if err := oprot.WriteI32(ctx, int32(*p.Precision)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.precision (8) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 8:precision: ", p), err) } - } - return err + if p.IsSetPrecision() { + if err := oprot.WriteFieldBegin(ctx, "precision", thrift.I32, 8); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 8:precision: ", p), err) + } + if err := oprot.WriteI32(ctx, int32(*p.Precision)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.precision (8) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 8:precision: ", p), err) + } + } + return err } func (p *SchemaElement) writeField9(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetFieldID() { - if err := oprot.WriteFieldBegin(ctx, "field_id", thrift.I32, 9); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 9:field_id: ", p), err) } - if err := oprot.WriteI32(ctx, int32(*p.FieldID)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.field_id (9) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 9:field_id: ", p), err) } - } - return err + if p.IsSetFieldID() { + if err := oprot.WriteFieldBegin(ctx, "field_id", thrift.I32, 9); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 9:field_id: ", p), err) + } + if err := oprot.WriteI32(ctx, int32(*p.FieldID)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.field_id (9) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 9:field_id: ", p), err) + } + } + return err } func (p *SchemaElement) writeField10(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetLogicalType() { - if err := oprot.WriteFieldBegin(ctx, "logicalType", thrift.STRUCT, 10); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 10:logicalType: ", p), err) } - if err := p.LogicalType.Write(ctx, oprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.LogicalType), err) - } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 10:logicalType: ", p), err) } - } - return err + if p.IsSetLogicalType() { + if err := oprot.WriteFieldBegin(ctx, "logicalType", thrift.STRUCT, 10); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 10:logicalType: ", p), err) + } + if err := p.LogicalType.Write(ctx, oprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.LogicalType), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 10:logicalType: ", p), err) + } + } + return err } func (p *SchemaElement) Equals(other *SchemaElement) bool { - if p == other { - return true - } else if p == nil || other == nil { - return false - } - if p.Type != other.Type { - if p.Type == nil || other.Type == nil { - return false - } - if (*p.Type) != (*other.Type) { return false } - } - if p.TypeLength != other.TypeLength { - if p.TypeLength == nil || other.TypeLength == nil { - return false - } - if (*p.TypeLength) != (*other.TypeLength) { return false } - } - if p.RepetitionType != other.RepetitionType { - if p.RepetitionType == nil || other.RepetitionType == nil { - return false - } - if (*p.RepetitionType) != (*other.RepetitionType) { return false } - } - if p.Name != other.Name { return false } - if p.NumChildren != other.NumChildren { - if p.NumChildren == nil || other.NumChildren == nil { - return false - } - if (*p.NumChildren) != (*other.NumChildren) { return false } - } - if p.ConvertedType != other.ConvertedType { - if p.ConvertedType == nil || other.ConvertedType == nil { - return false - } - if (*p.ConvertedType) != (*other.ConvertedType) { return false } - } - if p.Scale != other.Scale { - if p.Scale == nil || other.Scale == nil { - return false - } - if (*p.Scale) != (*other.Scale) { return false } - } - if p.Precision != other.Precision { - if p.Precision == nil || other.Precision == nil { - return false - } - if (*p.Precision) != (*other.Precision) { return false } - } - if p.FieldID != other.FieldID { - if p.FieldID == nil || other.FieldID == nil { - return false - } - if (*p.FieldID) != (*other.FieldID) { return false } - } - if !p.LogicalType.Equals(other.LogicalType) { return false } - return true + if p == other { + return true + } else if p == nil || other == nil { + return false + } + if p.Type != other.Type { + if p.Type == nil || other.Type == nil { + return false + } + if (*p.Type) != (*other.Type) { + return false + } + } + if p.TypeLength != other.TypeLength { + if p.TypeLength == nil || other.TypeLength == nil { + return false + } + if (*p.TypeLength) != (*other.TypeLength) { + return false + } + } + if p.RepetitionType != other.RepetitionType { + if p.RepetitionType == nil || other.RepetitionType == nil { + return false + } + if (*p.RepetitionType) != (*other.RepetitionType) { + return false + } + } + if p.Name != other.Name { + return false + } + if p.NumChildren != other.NumChildren { + if p.NumChildren == nil || other.NumChildren == nil { + return false + } + if (*p.NumChildren) != (*other.NumChildren) { + return false + } + } + if p.ConvertedType != other.ConvertedType { + if p.ConvertedType == nil || other.ConvertedType == nil { + return false + } + if (*p.ConvertedType) != (*other.ConvertedType) { + return false + } + } + if p.Scale != other.Scale { + if p.Scale == nil || other.Scale == nil { + return false + } + if (*p.Scale) != (*other.Scale) { + return false + } + } + if p.Precision != other.Precision { + if p.Precision == nil || other.Precision == nil { + return false + } + if (*p.Precision) != (*other.Precision) { + return false + } + } + if p.FieldID != other.FieldID { + if p.FieldID == nil || other.FieldID == nil { + return false + } + if (*p.FieldID) != (*other.FieldID) { + return false + } + } + if !p.LogicalType.Equals(other.LogicalType) { + return false + } + return true } func (p *SchemaElement) String() string { - if p == nil { - return "" - } - return fmt.Sprintf("SchemaElement(%+v)", *p) + if p == nil { + return "" + } + return fmt.Sprintf("SchemaElement(%+v)", *p) } func (p *SchemaElement) Validate() error { - return nil + return nil } + // Data page header -// +// // Attributes: -// - NumValues: Number of values, including NULLs, in this data page. * -// - Encoding: Encoding used for this data page * -// - DefinitionLevelEncoding: Encoding used for definition levels * -// - RepetitionLevelEncoding: Encoding used for repetition levels * -// - Statistics: Optional statistics for the data in this page* +// - NumValues: Number of values, including NULLs, in this data page. * +// - Encoding: Encoding used for this data page * +// - DefinitionLevelEncoding: Encoding used for definition levels * +// - RepetitionLevelEncoding: Encoding used for repetition levels * +// - Statistics: Optional statistics for the data in this page* type DataPageHeader struct { - NumValues int32 `thrift:"num_values,1,required" db:"num_values" json:"num_values"` - Encoding Encoding `thrift:"encoding,2,required" db:"encoding" json:"encoding"` - DefinitionLevelEncoding Encoding `thrift:"definition_level_encoding,3,required" db:"definition_level_encoding" json:"definition_level_encoding"` - RepetitionLevelEncoding Encoding `thrift:"repetition_level_encoding,4,required" db:"repetition_level_encoding" json:"repetition_level_encoding"` - Statistics *Statistics `thrift:"statistics,5" db:"statistics" json:"statistics,omitempty"` + NumValues int32 `thrift:"num_values,1,required" db:"num_values" json:"num_values"` + Encoding Encoding `thrift:"encoding,2,required" db:"encoding" json:"encoding"` + DefinitionLevelEncoding Encoding `thrift:"definition_level_encoding,3,required" db:"definition_level_encoding" json:"definition_level_encoding"` + RepetitionLevelEncoding Encoding `thrift:"repetition_level_encoding,4,required" db:"repetition_level_encoding" json:"repetition_level_encoding"` + Statistics *Statistics `thrift:"statistics,5" db:"statistics" json:"statistics,omitempty"` } func NewDataPageHeader() *DataPageHeader { - return &DataPageHeader{} + return &DataPageHeader{} } - func (p *DataPageHeader) GetNumValues() int32 { - return p.NumValues + return p.NumValues } func (p *DataPageHeader) GetEncoding() Encoding { - return p.Encoding + return p.Encoding } func (p *DataPageHeader) GetDefinitionLevelEncoding() Encoding { - return p.DefinitionLevelEncoding + return p.DefinitionLevelEncoding } func (p *DataPageHeader) GetRepetitionLevelEncoding() Encoding { - return p.RepetitionLevelEncoding + return p.RepetitionLevelEncoding } + var DataPageHeader_Statistics_DEFAULT *Statistics + func (p *DataPageHeader) GetStatistics() *Statistics { - if !p.IsSetStatistics() { - return DataPageHeader_Statistics_DEFAULT - } -return p.Statistics + if !p.IsSetStatistics() { + return DataPageHeader_Statistics_DEFAULT + } + return p.Statistics } func (p *DataPageHeader) IsSetStatistics() bool { - return p.Statistics != nil + return p.Statistics != nil } func (p *DataPageHeader) Read(ctx context.Context, iprot thrift.TProtocol) error { - if _, err := iprot.ReadStructBegin(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) - } - - var issetNumValues bool = false; - var issetEncoding bool = false; - var issetDefinitionLevelEncoding bool = false; - var issetRepetitionLevelEncoding bool = false; - - for { - _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) - if err != nil { - return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) - } - if fieldTypeId == thrift.STOP { break; } - switch fieldId { - case 1: - if fieldTypeId == thrift.I32 { - if err := p.ReadField1(ctx, iprot); err != nil { - return err - } - issetNumValues = true - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 2: - if fieldTypeId == thrift.I32 { - if err := p.ReadField2(ctx, iprot); err != nil { - return err - } - issetEncoding = true - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 3: - if fieldTypeId == thrift.I32 { - if err := p.ReadField3(ctx, iprot); err != nil { - return err - } - issetDefinitionLevelEncoding = true - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 4: - if fieldTypeId == thrift.I32 { - if err := p.ReadField4(ctx, iprot); err != nil { - return err - } - issetRepetitionLevelEncoding = true - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 5: - if fieldTypeId == thrift.STRUCT { - if err := p.ReadField5(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - default: - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - if err := iprot.ReadFieldEnd(ctx); err != nil { - return err - } - } - if err := iprot.ReadStructEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) - } - if !issetNumValues{ - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field NumValues is not set")); - } - if !issetEncoding{ - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Encoding is not set")); - } - if !issetDefinitionLevelEncoding{ - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field DefinitionLevelEncoding is not set")); - } - if !issetRepetitionLevelEncoding{ - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field RepetitionLevelEncoding is not set")); - } - return nil -} - -func (p *DataPageHeader) ReadField1(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadI32(ctx); err != nil { - return thrift.PrependError("error reading field 1: ", err) -} else { - p.NumValues = v -} - return nil -} - -func (p *DataPageHeader) ReadField2(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadI32(ctx); err != nil { - return thrift.PrependError("error reading field 2: ", err) -} else { - temp := Encoding(v) - p.Encoding = temp -} - return nil -} - -func (p *DataPageHeader) ReadField3(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadI32(ctx); err != nil { - return thrift.PrependError("error reading field 3: ", err) -} else { - temp := Encoding(v) - p.DefinitionLevelEncoding = temp -} - return nil -} - -func (p *DataPageHeader) ReadField4(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadI32(ctx); err != nil { - return thrift.PrependError("error reading field 4: ", err) -} else { - temp := Encoding(v) - p.RepetitionLevelEncoding = temp -} - return nil -} - -func (p *DataPageHeader) ReadField5(ctx context.Context, iprot thrift.TProtocol) error { - p.Statistics = &Statistics{} - if err := p.Statistics.Read(ctx, iprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.Statistics), err) - } - return nil + if _, err := iprot.ReadStructBegin(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) + } + + var issetNumValues bool = false + var issetEncoding bool = false + var issetDefinitionLevelEncoding bool = false + var issetRepetitionLevelEncoding bool = false + + for { + _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) + if err != nil { + return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) + } + if fieldTypeId == thrift.STOP { + break + } + switch fieldId { + case 1: + if fieldTypeId == thrift.I32 { + if err := p.ReadField1(ctx, iprot); err != nil { + return err + } + issetNumValues = true + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 2: + if fieldTypeId == thrift.I32 { + if err := p.ReadField2(ctx, iprot); err != nil { + return err + } + issetEncoding = true + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 3: + if fieldTypeId == thrift.I32 { + if err := p.ReadField3(ctx, iprot); err != nil { + return err + } + issetDefinitionLevelEncoding = true + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 4: + if fieldTypeId == thrift.I32 { + if err := p.ReadField4(ctx, iprot); err != nil { + return err + } + issetRepetitionLevelEncoding = true + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 5: + if fieldTypeId == thrift.STRUCT { + if err := p.ReadField5(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + default: + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + if err := iprot.ReadFieldEnd(ctx); err != nil { + return err + } + } + if err := iprot.ReadStructEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) + } + if !issetNumValues { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field NumValues is not set")) + } + if !issetEncoding { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Encoding is not set")) + } + if !issetDefinitionLevelEncoding { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field DefinitionLevelEncoding is not set")) + } + if !issetRepetitionLevelEncoding { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field RepetitionLevelEncoding is not set")) + } + return nil +} + +func (p *DataPageHeader) ReadField1(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadI32(ctx); err != nil { + return thrift.PrependError("error reading field 1: ", err) + } else { + p.NumValues = v + } + return nil +} + +func (p *DataPageHeader) ReadField2(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadI32(ctx); err != nil { + return thrift.PrependError("error reading field 2: ", err) + } else { + temp := Encoding(v) + p.Encoding = temp + } + return nil +} + +func (p *DataPageHeader) ReadField3(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadI32(ctx); err != nil { + return thrift.PrependError("error reading field 3: ", err) + } else { + temp := Encoding(v) + p.DefinitionLevelEncoding = temp + } + return nil +} + +func (p *DataPageHeader) ReadField4(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadI32(ctx); err != nil { + return thrift.PrependError("error reading field 4: ", err) + } else { + temp := Encoding(v) + p.RepetitionLevelEncoding = temp + } + return nil +} + +func (p *DataPageHeader) ReadField5(ctx context.Context, iprot thrift.TProtocol) error { + p.Statistics = &Statistics{} + if err := p.Statistics.Read(ctx, iprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.Statistics), err) + } + return nil } func (p *DataPageHeader) Write(ctx context.Context, oprot thrift.TProtocol) error { - if err := oprot.WriteStructBegin(ctx, "DataPageHeader"); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) } - if p != nil { - if err := p.writeField1(ctx, oprot); err != nil { return err } - if err := p.writeField2(ctx, oprot); err != nil { return err } - if err := p.writeField3(ctx, oprot); err != nil { return err } - if err := p.writeField4(ctx, oprot); err != nil { return err } - if err := p.writeField5(ctx, oprot); err != nil { return err } - } - if err := oprot.WriteFieldStop(ctx); err != nil { - return thrift.PrependError("write field stop error: ", err) } - if err := oprot.WriteStructEnd(ctx); err != nil { - return thrift.PrependError("write struct stop error: ", err) } - return nil + if err := oprot.WriteStructBegin(ctx, "DataPageHeader"); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) + } + if p != nil { + if err := p.writeField1(ctx, oprot); err != nil { + return err + } + if err := p.writeField2(ctx, oprot); err != nil { + return err + } + if err := p.writeField3(ctx, oprot); err != nil { + return err + } + if err := p.writeField4(ctx, oprot); err != nil { + return err + } + if err := p.writeField5(ctx, oprot); err != nil { + return err + } + } + if err := oprot.WriteFieldStop(ctx); err != nil { + return thrift.PrependError("write field stop error: ", err) + } + if err := oprot.WriteStructEnd(ctx); err != nil { + return thrift.PrependError("write struct stop error: ", err) + } + return nil } func (p *DataPageHeader) writeField1(ctx context.Context, oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin(ctx, "num_values", thrift.I32, 1); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:num_values: ", p), err) } - if err := oprot.WriteI32(ctx, int32(p.NumValues)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.num_values (1) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 1:num_values: ", p), err) } - return err + if err := oprot.WriteFieldBegin(ctx, "num_values", thrift.I32, 1); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:num_values: ", p), err) + } + if err := oprot.WriteI32(ctx, int32(p.NumValues)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.num_values (1) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 1:num_values: ", p), err) + } + return err } func (p *DataPageHeader) writeField2(ctx context.Context, oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin(ctx, "encoding", thrift.I32, 2); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:encoding: ", p), err) } - if err := oprot.WriteI32(ctx, int32(p.Encoding)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.encoding (2) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 2:encoding: ", p), err) } - return err + if err := oprot.WriteFieldBegin(ctx, "encoding", thrift.I32, 2); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:encoding: ", p), err) + } + if err := oprot.WriteI32(ctx, int32(p.Encoding)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.encoding (2) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 2:encoding: ", p), err) + } + return err } func (p *DataPageHeader) writeField3(ctx context.Context, oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin(ctx, "definition_level_encoding", thrift.I32, 3); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 3:definition_level_encoding: ", p), err) } - if err := oprot.WriteI32(ctx, int32(p.DefinitionLevelEncoding)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.definition_level_encoding (3) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 3:definition_level_encoding: ", p), err) } - return err + if err := oprot.WriteFieldBegin(ctx, "definition_level_encoding", thrift.I32, 3); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 3:definition_level_encoding: ", p), err) + } + if err := oprot.WriteI32(ctx, int32(p.DefinitionLevelEncoding)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.definition_level_encoding (3) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 3:definition_level_encoding: ", p), err) + } + return err } func (p *DataPageHeader) writeField4(ctx context.Context, oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin(ctx, "repetition_level_encoding", thrift.I32, 4); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 4:repetition_level_encoding: ", p), err) } - if err := oprot.WriteI32(ctx, int32(p.RepetitionLevelEncoding)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.repetition_level_encoding (4) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 4:repetition_level_encoding: ", p), err) } - return err + if err := oprot.WriteFieldBegin(ctx, "repetition_level_encoding", thrift.I32, 4); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 4:repetition_level_encoding: ", p), err) + } + if err := oprot.WriteI32(ctx, int32(p.RepetitionLevelEncoding)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.repetition_level_encoding (4) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 4:repetition_level_encoding: ", p), err) + } + return err } func (p *DataPageHeader) writeField5(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetStatistics() { - if err := oprot.WriteFieldBegin(ctx, "statistics", thrift.STRUCT, 5); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 5:statistics: ", p), err) } - if err := p.Statistics.Write(ctx, oprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.Statistics), err) - } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 5:statistics: ", p), err) } - } - return err + if p.IsSetStatistics() { + if err := oprot.WriteFieldBegin(ctx, "statistics", thrift.STRUCT, 5); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 5:statistics: ", p), err) + } + if err := p.Statistics.Write(ctx, oprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.Statistics), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 5:statistics: ", p), err) + } + } + return err } func (p *DataPageHeader) Equals(other *DataPageHeader) bool { - if p == other { - return true - } else if p == nil || other == nil { - return false - } - if p.NumValues != other.NumValues { return false } - if p.Encoding != other.Encoding { return false } - if p.DefinitionLevelEncoding != other.DefinitionLevelEncoding { return false } - if p.RepetitionLevelEncoding != other.RepetitionLevelEncoding { return false } - if !p.Statistics.Equals(other.Statistics) { return false } - return true + if p == other { + return true + } else if p == nil || other == nil { + return false + } + if p.NumValues != other.NumValues { + return false + } + if p.Encoding != other.Encoding { + return false + } + if p.DefinitionLevelEncoding != other.DefinitionLevelEncoding { + return false + } + if p.RepetitionLevelEncoding != other.RepetitionLevelEncoding { + return false + } + if !p.Statistics.Equals(other.Statistics) { + return false + } + return true } func (p *DataPageHeader) String() string { - if p == nil { - return "" - } - return fmt.Sprintf("DataPageHeader(%+v)", *p) + if p == nil { + return "" + } + return fmt.Sprintf("DataPageHeader(%+v)", *p) } func (p *DataPageHeader) Validate() error { - return nil + return nil } + type IndexPageHeader struct { } func NewIndexPageHeader() *IndexPageHeader { - return &IndexPageHeader{} + return &IndexPageHeader{} } func (p *IndexPageHeader) Read(ctx context.Context, iprot thrift.TProtocol) error { - if _, err := iprot.ReadStructBegin(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) - } - - - for { - _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) - if err != nil { - return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) - } - if fieldTypeId == thrift.STOP { break; } - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - if err := iprot.ReadFieldEnd(ctx); err != nil { - return err - } - } - if err := iprot.ReadStructEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) - } - return nil + if _, err := iprot.ReadStructBegin(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) + } + + for { + _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) + if err != nil { + return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) + } + if fieldTypeId == thrift.STOP { + break + } + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + if err := iprot.ReadFieldEnd(ctx); err != nil { + return err + } + } + if err := iprot.ReadStructEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) + } + return nil } func (p *IndexPageHeader) Write(ctx context.Context, oprot thrift.TProtocol) error { - if err := oprot.WriteStructBegin(ctx, "IndexPageHeader"); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) } - if p != nil { - } - if err := oprot.WriteFieldStop(ctx); err != nil { - return thrift.PrependError("write field stop error: ", err) } - if err := oprot.WriteStructEnd(ctx); err != nil { - return thrift.PrependError("write struct stop error: ", err) } - return nil + if err := oprot.WriteStructBegin(ctx, "IndexPageHeader"); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) + } + if p != nil { + } + if err := oprot.WriteFieldStop(ctx); err != nil { + return thrift.PrependError("write field stop error: ", err) + } + if err := oprot.WriteStructEnd(ctx); err != nil { + return thrift.PrependError("write struct stop error: ", err) + } + return nil } func (p *IndexPageHeader) Equals(other *IndexPageHeader) bool { - if p == other { - return true - } else if p == nil || other == nil { - return false - } - return true + if p == other { + return true + } else if p == nil || other == nil { + return false + } + return true } func (p *IndexPageHeader) String() string { - if p == nil { - return "" - } - return fmt.Sprintf("IndexPageHeader(%+v)", *p) + if p == nil { + return "" + } + return fmt.Sprintf("IndexPageHeader(%+v)", *p) } func (p *IndexPageHeader) Validate() error { - return nil + return nil } + // The dictionary page must be placed at the first position of the column chunk // if it is partly or completely dictionary encoded. At most one dictionary page // can be placed in a column chunk. -// -// +// // Attributes: -// - NumValues: Number of values in the dictionary * -// - Encoding: Encoding using this dictionary page * -// - IsSorted: If true, the entries in the dictionary are sorted in ascending order * +// - NumValues: Number of values in the dictionary * +// - Encoding: Encoding using this dictionary page * +// - IsSorted: If true, the entries in the dictionary are sorted in ascending order * type DictionaryPageHeader struct { - NumValues int32 `thrift:"num_values,1,required" db:"num_values" json:"num_values"` - Encoding Encoding `thrift:"encoding,2,required" db:"encoding" json:"encoding"` - IsSorted *bool `thrift:"is_sorted,3" db:"is_sorted" json:"is_sorted,omitempty"` + NumValues int32 `thrift:"num_values,1,required" db:"num_values" json:"num_values"` + Encoding Encoding `thrift:"encoding,2,required" db:"encoding" json:"encoding"` + IsSorted *bool `thrift:"is_sorted,3" db:"is_sorted" json:"is_sorted,omitempty"` } func NewDictionaryPageHeader() *DictionaryPageHeader { - return &DictionaryPageHeader{} + return &DictionaryPageHeader{} } - func (p *DictionaryPageHeader) GetNumValues() int32 { - return p.NumValues + return p.NumValues } func (p *DictionaryPageHeader) GetEncoding() Encoding { - return p.Encoding + return p.Encoding } + var DictionaryPageHeader_IsSorted_DEFAULT bool + func (p *DictionaryPageHeader) GetIsSorted() bool { - if !p.IsSetIsSorted() { - return DictionaryPageHeader_IsSorted_DEFAULT - } -return *p.IsSorted + if !p.IsSetIsSorted() { + return DictionaryPageHeader_IsSorted_DEFAULT + } + return *p.IsSorted } func (p *DictionaryPageHeader) IsSetIsSorted() bool { - return p.IsSorted != nil + return p.IsSorted != nil } func (p *DictionaryPageHeader) Read(ctx context.Context, iprot thrift.TProtocol) error { - if _, err := iprot.ReadStructBegin(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) - } - - var issetNumValues bool = false; - var issetEncoding bool = false; - - for { - _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) - if err != nil { - return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) - } - if fieldTypeId == thrift.STOP { break; } - switch fieldId { - case 1: - if fieldTypeId == thrift.I32 { - if err := p.ReadField1(ctx, iprot); err != nil { - return err - } - issetNumValues = true - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 2: - if fieldTypeId == thrift.I32 { - if err := p.ReadField2(ctx, iprot); err != nil { - return err - } - issetEncoding = true - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 3: - if fieldTypeId == thrift.BOOL { - if err := p.ReadField3(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - default: - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - if err := iprot.ReadFieldEnd(ctx); err != nil { - return err - } - } - if err := iprot.ReadStructEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) - } - if !issetNumValues{ - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field NumValues is not set")); - } - if !issetEncoding{ - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Encoding is not set")); - } - return nil -} - -func (p *DictionaryPageHeader) ReadField1(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadI32(ctx); err != nil { - return thrift.PrependError("error reading field 1: ", err) -} else { - p.NumValues = v -} - return nil -} - -func (p *DictionaryPageHeader) ReadField2(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadI32(ctx); err != nil { - return thrift.PrependError("error reading field 2: ", err) -} else { - temp := Encoding(v) - p.Encoding = temp -} - return nil -} - -func (p *DictionaryPageHeader) ReadField3(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadBool(ctx); err != nil { - return thrift.PrependError("error reading field 3: ", err) -} else { - p.IsSorted = &v -} - return nil + if _, err := iprot.ReadStructBegin(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) + } + + var issetNumValues bool = false + var issetEncoding bool = false + + for { + _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) + if err != nil { + return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) + } + if fieldTypeId == thrift.STOP { + break + } + switch fieldId { + case 1: + if fieldTypeId == thrift.I32 { + if err := p.ReadField1(ctx, iprot); err != nil { + return err + } + issetNumValues = true + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 2: + if fieldTypeId == thrift.I32 { + if err := p.ReadField2(ctx, iprot); err != nil { + return err + } + issetEncoding = true + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 3: + if fieldTypeId == thrift.BOOL { + if err := p.ReadField3(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + default: + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + if err := iprot.ReadFieldEnd(ctx); err != nil { + return err + } + } + if err := iprot.ReadStructEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) + } + if !issetNumValues { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field NumValues is not set")) + } + if !issetEncoding { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Encoding is not set")) + } + return nil +} + +func (p *DictionaryPageHeader) ReadField1(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadI32(ctx); err != nil { + return thrift.PrependError("error reading field 1: ", err) + } else { + p.NumValues = v + } + return nil +} + +func (p *DictionaryPageHeader) ReadField2(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadI32(ctx); err != nil { + return thrift.PrependError("error reading field 2: ", err) + } else { + temp := Encoding(v) + p.Encoding = temp + } + return nil +} + +func (p *DictionaryPageHeader) ReadField3(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadBool(ctx); err != nil { + return thrift.PrependError("error reading field 3: ", err) + } else { + p.IsSorted = &v + } + return nil } func (p *DictionaryPageHeader) Write(ctx context.Context, oprot thrift.TProtocol) error { - if err := oprot.WriteStructBegin(ctx, "DictionaryPageHeader"); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) } - if p != nil { - if err := p.writeField1(ctx, oprot); err != nil { return err } - if err := p.writeField2(ctx, oprot); err != nil { return err } - if err := p.writeField3(ctx, oprot); err != nil { return err } - } - if err := oprot.WriteFieldStop(ctx); err != nil { - return thrift.PrependError("write field stop error: ", err) } - if err := oprot.WriteStructEnd(ctx); err != nil { - return thrift.PrependError("write struct stop error: ", err) } - return nil + if err := oprot.WriteStructBegin(ctx, "DictionaryPageHeader"); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) + } + if p != nil { + if err := p.writeField1(ctx, oprot); err != nil { + return err + } + if err := p.writeField2(ctx, oprot); err != nil { + return err + } + if err := p.writeField3(ctx, oprot); err != nil { + return err + } + } + if err := oprot.WriteFieldStop(ctx); err != nil { + return thrift.PrependError("write field stop error: ", err) + } + if err := oprot.WriteStructEnd(ctx); err != nil { + return thrift.PrependError("write struct stop error: ", err) + } + return nil } func (p *DictionaryPageHeader) writeField1(ctx context.Context, oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin(ctx, "num_values", thrift.I32, 1); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:num_values: ", p), err) } - if err := oprot.WriteI32(ctx, int32(p.NumValues)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.num_values (1) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 1:num_values: ", p), err) } - return err + if err := oprot.WriteFieldBegin(ctx, "num_values", thrift.I32, 1); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:num_values: ", p), err) + } + if err := oprot.WriteI32(ctx, int32(p.NumValues)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.num_values (1) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 1:num_values: ", p), err) + } + return err } func (p *DictionaryPageHeader) writeField2(ctx context.Context, oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin(ctx, "encoding", thrift.I32, 2); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:encoding: ", p), err) } - if err := oprot.WriteI32(ctx, int32(p.Encoding)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.encoding (2) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 2:encoding: ", p), err) } - return err + if err := oprot.WriteFieldBegin(ctx, "encoding", thrift.I32, 2); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:encoding: ", p), err) + } + if err := oprot.WriteI32(ctx, int32(p.Encoding)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.encoding (2) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 2:encoding: ", p), err) + } + return err } func (p *DictionaryPageHeader) writeField3(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetIsSorted() { - if err := oprot.WriteFieldBegin(ctx, "is_sorted", thrift.BOOL, 3); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 3:is_sorted: ", p), err) } - if err := oprot.WriteBool(ctx, bool(*p.IsSorted)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.is_sorted (3) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 3:is_sorted: ", p), err) } - } - return err + if p.IsSetIsSorted() { + if err := oprot.WriteFieldBegin(ctx, "is_sorted", thrift.BOOL, 3); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 3:is_sorted: ", p), err) + } + if err := oprot.WriteBool(ctx, bool(*p.IsSorted)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.is_sorted (3) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 3:is_sorted: ", p), err) + } + } + return err } func (p *DictionaryPageHeader) Equals(other *DictionaryPageHeader) bool { - if p == other { - return true - } else if p == nil || other == nil { - return false - } - if p.NumValues != other.NumValues { return false } - if p.Encoding != other.Encoding { return false } - if p.IsSorted != other.IsSorted { - if p.IsSorted == nil || other.IsSorted == nil { - return false - } - if (*p.IsSorted) != (*other.IsSorted) { return false } - } - return true + if p == other { + return true + } else if p == nil || other == nil { + return false + } + if p.NumValues != other.NumValues { + return false + } + if p.Encoding != other.Encoding { + return false + } + if p.IsSorted != other.IsSorted { + if p.IsSorted == nil || other.IsSorted == nil { + return false + } + if (*p.IsSorted) != (*other.IsSorted) { + return false + } + } + return true } func (p *DictionaryPageHeader) String() string { - if p == nil { - return "" - } - return fmt.Sprintf("DictionaryPageHeader(%+v)", *p) + if p == nil { + return "" + } + return fmt.Sprintf("DictionaryPageHeader(%+v)", *p) } func (p *DictionaryPageHeader) Validate() error { - return nil + return nil } + // New page format allowing reading levels without decompressing the data // Repetition and definition levels are uncompressed // The remaining section containing the data is compressed if is_compressed is true -// -// +// // Attributes: -// - NumValues: Number of values, including NULLs, in this data page. * -// - NumNulls: Number of NULL values, in this data page. +// - NumValues: Number of values, including NULLs, in this data page. * +// - NumNulls: Number of NULL values, in this data page. +// // Number of non-null = num_values - num_nulls which is also the number of values in the data section * -// - NumRows: Number of rows in this data page. which means pages change on record boundaries (r = 0) * -// - Encoding: Encoding used for data in this page * -// - DefinitionLevelsByteLength: length of the definition levels -// - RepetitionLevelsByteLength: length of the repetition levels -// - IsCompressed: whether the values are compressed. +// - NumRows: Number of rows in this data page. which means pages change on record boundaries (r = 0) * +// - Encoding: Encoding used for data in this page * +// - DefinitionLevelsByteLength: length of the definition levels +// - RepetitionLevelsByteLength: length of the repetition levels +// - IsCompressed: whether the values are compressed. +// // Which means the section of the page between // definition_levels_byte_length + repetition_levels_byte_length + 1 and compressed_page_size (included) // is compressed with the compression_codec. // If missing it is considered compressed -// - Statistics: optional statistics for the data in this page * +// - Statistics: optional statistics for the data in this page * type DataPageHeaderV2 struct { - NumValues int32 `thrift:"num_values,1,required" db:"num_values" json:"num_values"` - NumNulls int32 `thrift:"num_nulls,2,required" db:"num_nulls" json:"num_nulls"` - NumRows int32 `thrift:"num_rows,3,required" db:"num_rows" json:"num_rows"` - Encoding Encoding `thrift:"encoding,4,required" db:"encoding" json:"encoding"` - DefinitionLevelsByteLength int32 `thrift:"definition_levels_byte_length,5,required" db:"definition_levels_byte_length" json:"definition_levels_byte_length"` - RepetitionLevelsByteLength int32 `thrift:"repetition_levels_byte_length,6,required" db:"repetition_levels_byte_length" json:"repetition_levels_byte_length"` - IsCompressed bool `thrift:"is_compressed,7" db:"is_compressed" json:"is_compressed"` - Statistics *Statistics `thrift:"statistics,8" db:"statistics" json:"statistics,omitempty"` + NumValues int32 `thrift:"num_values,1,required" db:"num_values" json:"num_values"` + NumNulls int32 `thrift:"num_nulls,2,required" db:"num_nulls" json:"num_nulls"` + NumRows int32 `thrift:"num_rows,3,required" db:"num_rows" json:"num_rows"` + Encoding Encoding `thrift:"encoding,4,required" db:"encoding" json:"encoding"` + DefinitionLevelsByteLength int32 `thrift:"definition_levels_byte_length,5,required" db:"definition_levels_byte_length" json:"definition_levels_byte_length"` + RepetitionLevelsByteLength int32 `thrift:"repetition_levels_byte_length,6,required" db:"repetition_levels_byte_length" json:"repetition_levels_byte_length"` + IsCompressed bool `thrift:"is_compressed,7" db:"is_compressed" json:"is_compressed"` + Statistics *Statistics `thrift:"statistics,8" db:"statistics" json:"statistics,omitempty"` } func NewDataPageHeaderV2() *DataPageHeaderV2 { - return &DataPageHeaderV2{ -IsCompressed: true, -} + return &DataPageHeaderV2{ + IsCompressed: true, + } } - func (p *DataPageHeaderV2) GetNumValues() int32 { - return p.NumValues + return p.NumValues } func (p *DataPageHeaderV2) GetNumNulls() int32 { - return p.NumNulls + return p.NumNulls } func (p *DataPageHeaderV2) GetNumRows() int32 { - return p.NumRows + return p.NumRows } func (p *DataPageHeaderV2) GetEncoding() Encoding { - return p.Encoding + return p.Encoding } func (p *DataPageHeaderV2) GetDefinitionLevelsByteLength() int32 { - return p.DefinitionLevelsByteLength + return p.DefinitionLevelsByteLength } func (p *DataPageHeaderV2) GetRepetitionLevelsByteLength() int32 { - return p.RepetitionLevelsByteLength + return p.RepetitionLevelsByteLength } + var DataPageHeaderV2_IsCompressed_DEFAULT bool = true func (p *DataPageHeaderV2) GetIsCompressed() bool { - return p.IsCompressed + return p.IsCompressed } + var DataPageHeaderV2_Statistics_DEFAULT *Statistics + func (p *DataPageHeaderV2) GetStatistics() *Statistics { - if !p.IsSetStatistics() { - return DataPageHeaderV2_Statistics_DEFAULT - } -return p.Statistics + if !p.IsSetStatistics() { + return DataPageHeaderV2_Statistics_DEFAULT + } + return p.Statistics } func (p *DataPageHeaderV2) IsSetIsCompressed() bool { - return p.IsCompressed != DataPageHeaderV2_IsCompressed_DEFAULT + return p.IsCompressed != DataPageHeaderV2_IsCompressed_DEFAULT } func (p *DataPageHeaderV2) IsSetStatistics() bool { - return p.Statistics != nil + return p.Statistics != nil } func (p *DataPageHeaderV2) Read(ctx context.Context, iprot thrift.TProtocol) error { - if _, err := iprot.ReadStructBegin(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) - } - - var issetNumValues bool = false; - var issetNumNulls bool = false; - var issetNumRows bool = false; - var issetEncoding bool = false; - var issetDefinitionLevelsByteLength bool = false; - var issetRepetitionLevelsByteLength bool = false; - - for { - _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) - if err != nil { - return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) - } - if fieldTypeId == thrift.STOP { break; } - switch fieldId { - case 1: - if fieldTypeId == thrift.I32 { - if err := p.ReadField1(ctx, iprot); err != nil { - return err - } - issetNumValues = true - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 2: - if fieldTypeId == thrift.I32 { - if err := p.ReadField2(ctx, iprot); err != nil { - return err - } - issetNumNulls = true - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 3: - if fieldTypeId == thrift.I32 { - if err := p.ReadField3(ctx, iprot); err != nil { - return err - } - issetNumRows = true - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 4: - if fieldTypeId == thrift.I32 { - if err := p.ReadField4(ctx, iprot); err != nil { - return err - } - issetEncoding = true - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 5: - if fieldTypeId == thrift.I32 { - if err := p.ReadField5(ctx, iprot); err != nil { - return err - } - issetDefinitionLevelsByteLength = true - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 6: - if fieldTypeId == thrift.I32 { - if err := p.ReadField6(ctx, iprot); err != nil { - return err - } - issetRepetitionLevelsByteLength = true - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 7: - if fieldTypeId == thrift.BOOL { - if err := p.ReadField7(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 8: - if fieldTypeId == thrift.STRUCT { - if err := p.ReadField8(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - default: - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - if err := iprot.ReadFieldEnd(ctx); err != nil { - return err - } - } - if err := iprot.ReadStructEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) - } - if !issetNumValues{ - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field NumValues is not set")); - } - if !issetNumNulls{ - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field NumNulls is not set")); - } - if !issetNumRows{ - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field NumRows is not set")); - } - if !issetEncoding{ - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Encoding is not set")); - } - if !issetDefinitionLevelsByteLength{ - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field DefinitionLevelsByteLength is not set")); - } - if !issetRepetitionLevelsByteLength{ - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field RepetitionLevelsByteLength is not set")); - } - return nil -} - -func (p *DataPageHeaderV2) ReadField1(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadI32(ctx); err != nil { - return thrift.PrependError("error reading field 1: ", err) -} else { - p.NumValues = v -} - return nil -} - -func (p *DataPageHeaderV2) ReadField2(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadI32(ctx); err != nil { - return thrift.PrependError("error reading field 2: ", err) -} else { - p.NumNulls = v -} - return nil -} - -func (p *DataPageHeaderV2) ReadField3(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadI32(ctx); err != nil { - return thrift.PrependError("error reading field 3: ", err) -} else { - p.NumRows = v -} - return nil -} - -func (p *DataPageHeaderV2) ReadField4(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadI32(ctx); err != nil { - return thrift.PrependError("error reading field 4: ", err) -} else { - temp := Encoding(v) - p.Encoding = temp -} - return nil -} - -func (p *DataPageHeaderV2) ReadField5(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadI32(ctx); err != nil { - return thrift.PrependError("error reading field 5: ", err) -} else { - p.DefinitionLevelsByteLength = v -} - return nil -} - -func (p *DataPageHeaderV2) ReadField6(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadI32(ctx); err != nil { - return thrift.PrependError("error reading field 6: ", err) -} else { - p.RepetitionLevelsByteLength = v -} - return nil -} - -func (p *DataPageHeaderV2) ReadField7(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadBool(ctx); err != nil { - return thrift.PrependError("error reading field 7: ", err) -} else { - p.IsCompressed = v -} - return nil -} - -func (p *DataPageHeaderV2) ReadField8(ctx context.Context, iprot thrift.TProtocol) error { - p.Statistics = &Statistics{} - if err := p.Statistics.Read(ctx, iprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.Statistics), err) - } - return nil + if _, err := iprot.ReadStructBegin(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) + } + + var issetNumValues bool = false + var issetNumNulls bool = false + var issetNumRows bool = false + var issetEncoding bool = false + var issetDefinitionLevelsByteLength bool = false + var issetRepetitionLevelsByteLength bool = false + + for { + _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) + if err != nil { + return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) + } + if fieldTypeId == thrift.STOP { + break + } + switch fieldId { + case 1: + if fieldTypeId == thrift.I32 { + if err := p.ReadField1(ctx, iprot); err != nil { + return err + } + issetNumValues = true + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 2: + if fieldTypeId == thrift.I32 { + if err := p.ReadField2(ctx, iprot); err != nil { + return err + } + issetNumNulls = true + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 3: + if fieldTypeId == thrift.I32 { + if err := p.ReadField3(ctx, iprot); err != nil { + return err + } + issetNumRows = true + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 4: + if fieldTypeId == thrift.I32 { + if err := p.ReadField4(ctx, iprot); err != nil { + return err + } + issetEncoding = true + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 5: + if fieldTypeId == thrift.I32 { + if err := p.ReadField5(ctx, iprot); err != nil { + return err + } + issetDefinitionLevelsByteLength = true + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 6: + if fieldTypeId == thrift.I32 { + if err := p.ReadField6(ctx, iprot); err != nil { + return err + } + issetRepetitionLevelsByteLength = true + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 7: + if fieldTypeId == thrift.BOOL { + if err := p.ReadField7(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 8: + if fieldTypeId == thrift.STRUCT { + if err := p.ReadField8(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + default: + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + if err := iprot.ReadFieldEnd(ctx); err != nil { + return err + } + } + if err := iprot.ReadStructEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) + } + if !issetNumValues { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field NumValues is not set")) + } + if !issetNumNulls { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field NumNulls is not set")) + } + if !issetNumRows { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field NumRows is not set")) + } + if !issetEncoding { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Encoding is not set")) + } + if !issetDefinitionLevelsByteLength { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field DefinitionLevelsByteLength is not set")) + } + if !issetRepetitionLevelsByteLength { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field RepetitionLevelsByteLength is not set")) + } + return nil +} + +func (p *DataPageHeaderV2) ReadField1(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadI32(ctx); err != nil { + return thrift.PrependError("error reading field 1: ", err) + } else { + p.NumValues = v + } + return nil +} + +func (p *DataPageHeaderV2) ReadField2(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadI32(ctx); err != nil { + return thrift.PrependError("error reading field 2: ", err) + } else { + p.NumNulls = v + } + return nil +} + +func (p *DataPageHeaderV2) ReadField3(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadI32(ctx); err != nil { + return thrift.PrependError("error reading field 3: ", err) + } else { + p.NumRows = v + } + return nil +} + +func (p *DataPageHeaderV2) ReadField4(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadI32(ctx); err != nil { + return thrift.PrependError("error reading field 4: ", err) + } else { + temp := Encoding(v) + p.Encoding = temp + } + return nil +} + +func (p *DataPageHeaderV2) ReadField5(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadI32(ctx); err != nil { + return thrift.PrependError("error reading field 5: ", err) + } else { + p.DefinitionLevelsByteLength = v + } + return nil +} + +func (p *DataPageHeaderV2) ReadField6(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadI32(ctx); err != nil { + return thrift.PrependError("error reading field 6: ", err) + } else { + p.RepetitionLevelsByteLength = v + } + return nil +} + +func (p *DataPageHeaderV2) ReadField7(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadBool(ctx); err != nil { + return thrift.PrependError("error reading field 7: ", err) + } else { + p.IsCompressed = v + } + return nil +} + +func (p *DataPageHeaderV2) ReadField8(ctx context.Context, iprot thrift.TProtocol) error { + p.Statistics = &Statistics{} + if err := p.Statistics.Read(ctx, iprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.Statistics), err) + } + return nil } func (p *DataPageHeaderV2) Write(ctx context.Context, oprot thrift.TProtocol) error { - if err := oprot.WriteStructBegin(ctx, "DataPageHeaderV2"); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) } - if p != nil { - if err := p.writeField1(ctx, oprot); err != nil { return err } - if err := p.writeField2(ctx, oprot); err != nil { return err } - if err := p.writeField3(ctx, oprot); err != nil { return err } - if err := p.writeField4(ctx, oprot); err != nil { return err } - if err := p.writeField5(ctx, oprot); err != nil { return err } - if err := p.writeField6(ctx, oprot); err != nil { return err } - if err := p.writeField7(ctx, oprot); err != nil { return err } - if err := p.writeField8(ctx, oprot); err != nil { return err } - } - if err := oprot.WriteFieldStop(ctx); err != nil { - return thrift.PrependError("write field stop error: ", err) } - if err := oprot.WriteStructEnd(ctx); err != nil { - return thrift.PrependError("write struct stop error: ", err) } - return nil + if err := oprot.WriteStructBegin(ctx, "DataPageHeaderV2"); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) + } + if p != nil { + if err := p.writeField1(ctx, oprot); err != nil { + return err + } + if err := p.writeField2(ctx, oprot); err != nil { + return err + } + if err := p.writeField3(ctx, oprot); err != nil { + return err + } + if err := p.writeField4(ctx, oprot); err != nil { + return err + } + if err := p.writeField5(ctx, oprot); err != nil { + return err + } + if err := p.writeField6(ctx, oprot); err != nil { + return err + } + if err := p.writeField7(ctx, oprot); err != nil { + return err + } + if err := p.writeField8(ctx, oprot); err != nil { + return err + } + } + if err := oprot.WriteFieldStop(ctx); err != nil { + return thrift.PrependError("write field stop error: ", err) + } + if err := oprot.WriteStructEnd(ctx); err != nil { + return thrift.PrependError("write struct stop error: ", err) + } + return nil } func (p *DataPageHeaderV2) writeField1(ctx context.Context, oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin(ctx, "num_values", thrift.I32, 1); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:num_values: ", p), err) } - if err := oprot.WriteI32(ctx, int32(p.NumValues)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.num_values (1) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 1:num_values: ", p), err) } - return err + if err := oprot.WriteFieldBegin(ctx, "num_values", thrift.I32, 1); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:num_values: ", p), err) + } + if err := oprot.WriteI32(ctx, int32(p.NumValues)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.num_values (1) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 1:num_values: ", p), err) + } + return err } func (p *DataPageHeaderV2) writeField2(ctx context.Context, oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin(ctx, "num_nulls", thrift.I32, 2); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:num_nulls: ", p), err) } - if err := oprot.WriteI32(ctx, int32(p.NumNulls)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.num_nulls (2) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 2:num_nulls: ", p), err) } - return err + if err := oprot.WriteFieldBegin(ctx, "num_nulls", thrift.I32, 2); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:num_nulls: ", p), err) + } + if err := oprot.WriteI32(ctx, int32(p.NumNulls)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.num_nulls (2) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 2:num_nulls: ", p), err) + } + return err } func (p *DataPageHeaderV2) writeField3(ctx context.Context, oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin(ctx, "num_rows", thrift.I32, 3); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 3:num_rows: ", p), err) } - if err := oprot.WriteI32(ctx, int32(p.NumRows)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.num_rows (3) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 3:num_rows: ", p), err) } - return err + if err := oprot.WriteFieldBegin(ctx, "num_rows", thrift.I32, 3); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 3:num_rows: ", p), err) + } + if err := oprot.WriteI32(ctx, int32(p.NumRows)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.num_rows (3) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 3:num_rows: ", p), err) + } + return err } func (p *DataPageHeaderV2) writeField4(ctx context.Context, oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin(ctx, "encoding", thrift.I32, 4); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 4:encoding: ", p), err) } - if err := oprot.WriteI32(ctx, int32(p.Encoding)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.encoding (4) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 4:encoding: ", p), err) } - return err + if err := oprot.WriteFieldBegin(ctx, "encoding", thrift.I32, 4); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 4:encoding: ", p), err) + } + if err := oprot.WriteI32(ctx, int32(p.Encoding)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.encoding (4) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 4:encoding: ", p), err) + } + return err } func (p *DataPageHeaderV2) writeField5(ctx context.Context, oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin(ctx, "definition_levels_byte_length", thrift.I32, 5); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 5:definition_levels_byte_length: ", p), err) } - if err := oprot.WriteI32(ctx, int32(p.DefinitionLevelsByteLength)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.definition_levels_byte_length (5) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 5:definition_levels_byte_length: ", p), err) } - return err + if err := oprot.WriteFieldBegin(ctx, "definition_levels_byte_length", thrift.I32, 5); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 5:definition_levels_byte_length: ", p), err) + } + if err := oprot.WriteI32(ctx, int32(p.DefinitionLevelsByteLength)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.definition_levels_byte_length (5) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 5:definition_levels_byte_length: ", p), err) + } + return err } func (p *DataPageHeaderV2) writeField6(ctx context.Context, oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin(ctx, "repetition_levels_byte_length", thrift.I32, 6); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 6:repetition_levels_byte_length: ", p), err) } - if err := oprot.WriteI32(ctx, int32(p.RepetitionLevelsByteLength)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.repetition_levels_byte_length (6) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 6:repetition_levels_byte_length: ", p), err) } - return err + if err := oprot.WriteFieldBegin(ctx, "repetition_levels_byte_length", thrift.I32, 6); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 6:repetition_levels_byte_length: ", p), err) + } + if err := oprot.WriteI32(ctx, int32(p.RepetitionLevelsByteLength)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.repetition_levels_byte_length (6) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 6:repetition_levels_byte_length: ", p), err) + } + return err } func (p *DataPageHeaderV2) writeField7(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetIsCompressed() { - if err := oprot.WriteFieldBegin(ctx, "is_compressed", thrift.BOOL, 7); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 7:is_compressed: ", p), err) } - if err := oprot.WriteBool(ctx, bool(p.IsCompressed)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.is_compressed (7) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 7:is_compressed: ", p), err) } - } - return err + if p.IsSetIsCompressed() { + if err := oprot.WriteFieldBegin(ctx, "is_compressed", thrift.BOOL, 7); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 7:is_compressed: ", p), err) + } + if err := oprot.WriteBool(ctx, bool(p.IsCompressed)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.is_compressed (7) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 7:is_compressed: ", p), err) + } + } + return err } func (p *DataPageHeaderV2) writeField8(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetStatistics() { - if err := oprot.WriteFieldBegin(ctx, "statistics", thrift.STRUCT, 8); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 8:statistics: ", p), err) } - if err := p.Statistics.Write(ctx, oprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.Statistics), err) - } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 8:statistics: ", p), err) } - } - return err + if p.IsSetStatistics() { + if err := oprot.WriteFieldBegin(ctx, "statistics", thrift.STRUCT, 8); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 8:statistics: ", p), err) + } + if err := p.Statistics.Write(ctx, oprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.Statistics), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 8:statistics: ", p), err) + } + } + return err } func (p *DataPageHeaderV2) Equals(other *DataPageHeaderV2) bool { - if p == other { - return true - } else if p == nil || other == nil { - return false - } - if p.NumValues != other.NumValues { return false } - if p.NumNulls != other.NumNulls { return false } - if p.NumRows != other.NumRows { return false } - if p.Encoding != other.Encoding { return false } - if p.DefinitionLevelsByteLength != other.DefinitionLevelsByteLength { return false } - if p.RepetitionLevelsByteLength != other.RepetitionLevelsByteLength { return false } - if p.IsCompressed != other.IsCompressed { return false } - if !p.Statistics.Equals(other.Statistics) { return false } - return true + if p == other { + return true + } else if p == nil || other == nil { + return false + } + if p.NumValues != other.NumValues { + return false + } + if p.NumNulls != other.NumNulls { + return false + } + if p.NumRows != other.NumRows { + return false + } + if p.Encoding != other.Encoding { + return false + } + if p.DefinitionLevelsByteLength != other.DefinitionLevelsByteLength { + return false + } + if p.RepetitionLevelsByteLength != other.RepetitionLevelsByteLength { + return false + } + if p.IsCompressed != other.IsCompressed { + return false + } + if !p.Statistics.Equals(other.Statistics) { + return false + } + return true } func (p *DataPageHeaderV2) String() string { - if p == nil { - return "" - } - return fmt.Sprintf("DataPageHeaderV2(%+v)", *p) + if p == nil { + return "" + } + return fmt.Sprintf("DataPageHeaderV2(%+v)", *p) } func (p *DataPageHeaderV2) Validate() error { - return nil + return nil } + // Block-based algorithm type annotation. * type SplitBlockAlgorithm struct { } func NewSplitBlockAlgorithm() *SplitBlockAlgorithm { - return &SplitBlockAlgorithm{} + return &SplitBlockAlgorithm{} } func (p *SplitBlockAlgorithm) Read(ctx context.Context, iprot thrift.TProtocol) error { - if _, err := iprot.ReadStructBegin(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) - } - - - for { - _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) - if err != nil { - return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) - } - if fieldTypeId == thrift.STOP { break; } - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - if err := iprot.ReadFieldEnd(ctx); err != nil { - return err - } - } - if err := iprot.ReadStructEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) - } - return nil + if _, err := iprot.ReadStructBegin(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) + } + + for { + _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) + if err != nil { + return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) + } + if fieldTypeId == thrift.STOP { + break + } + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + if err := iprot.ReadFieldEnd(ctx); err != nil { + return err + } + } + if err := iprot.ReadStructEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) + } + return nil } func (p *SplitBlockAlgorithm) Write(ctx context.Context, oprot thrift.TProtocol) error { - if err := oprot.WriteStructBegin(ctx, "SplitBlockAlgorithm"); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) } - if p != nil { - } - if err := oprot.WriteFieldStop(ctx); err != nil { - return thrift.PrependError("write field stop error: ", err) } - if err := oprot.WriteStructEnd(ctx); err != nil { - return thrift.PrependError("write struct stop error: ", err) } - return nil + if err := oprot.WriteStructBegin(ctx, "SplitBlockAlgorithm"); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) + } + if p != nil { + } + if err := oprot.WriteFieldStop(ctx); err != nil { + return thrift.PrependError("write field stop error: ", err) + } + if err := oprot.WriteStructEnd(ctx); err != nil { + return thrift.PrependError("write struct stop error: ", err) + } + return nil } func (p *SplitBlockAlgorithm) Equals(other *SplitBlockAlgorithm) bool { - if p == other { - return true - } else if p == nil || other == nil { - return false - } - return true + if p == other { + return true + } else if p == nil || other == nil { + return false + } + return true } func (p *SplitBlockAlgorithm) String() string { - if p == nil { - return "" - } - return fmt.Sprintf("SplitBlockAlgorithm(%+v)", *p) + if p == nil { + return "" + } + return fmt.Sprintf("SplitBlockAlgorithm(%+v)", *p) } func (p *SplitBlockAlgorithm) Validate() error { - return nil + return nil } + // The algorithm used in Bloom filter. * -// +// // Attributes: -// - BLOCK: Block-based Bloom filter. * +// - BLOCK: Block-based Bloom filter. * type BloomFilterAlgorithm struct { - BLOCK *SplitBlockAlgorithm `thrift:"BLOCK,1" db:"BLOCK" json:"BLOCK,omitempty"` + BLOCK *SplitBlockAlgorithm `thrift:"BLOCK,1" db:"BLOCK" json:"BLOCK,omitempty"` } func NewBloomFilterAlgorithm() *BloomFilterAlgorithm { - return &BloomFilterAlgorithm{} + return &BloomFilterAlgorithm{} } var BloomFilterAlgorithm_BLOCK_DEFAULT *SplitBlockAlgorithm + func (p *BloomFilterAlgorithm) GetBLOCK() *SplitBlockAlgorithm { - if !p.IsSetBLOCK() { - return BloomFilterAlgorithm_BLOCK_DEFAULT - } -return p.BLOCK + if !p.IsSetBLOCK() { + return BloomFilterAlgorithm_BLOCK_DEFAULT + } + return p.BLOCK } func (p *BloomFilterAlgorithm) CountSetFieldsBloomFilterAlgorithm() int { - count := 0 - if (p.IsSetBLOCK()) { - count++ - } - return count + count := 0 + if p.IsSetBLOCK() { + count++ + } + return count } func (p *BloomFilterAlgorithm) IsSetBLOCK() bool { - return p.BLOCK != nil + return p.BLOCK != nil } func (p *BloomFilterAlgorithm) Read(ctx context.Context, iprot thrift.TProtocol) error { - if _, err := iprot.ReadStructBegin(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) - } - - - for { - _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) - if err != nil { - return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) - } - if fieldTypeId == thrift.STOP { break; } - switch fieldId { - case 1: - if fieldTypeId == thrift.STRUCT { - if err := p.ReadField1(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - default: - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - if err := iprot.ReadFieldEnd(ctx); err != nil { - return err - } - } - if err := iprot.ReadStructEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) - } - return nil -} - -func (p *BloomFilterAlgorithm) ReadField1(ctx context.Context, iprot thrift.TProtocol) error { - p.BLOCK = &SplitBlockAlgorithm{} - if err := p.BLOCK.Read(ctx, iprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.BLOCK), err) - } - return nil + if _, err := iprot.ReadStructBegin(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) + } + + for { + _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) + if err != nil { + return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) + } + if fieldTypeId == thrift.STOP { + break + } + switch fieldId { + case 1: + if fieldTypeId == thrift.STRUCT { + if err := p.ReadField1(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + default: + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + if err := iprot.ReadFieldEnd(ctx); err != nil { + return err + } + } + if err := iprot.ReadStructEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) + } + return nil +} + +func (p *BloomFilterAlgorithm) ReadField1(ctx context.Context, iprot thrift.TProtocol) error { + p.BLOCK = &SplitBlockAlgorithm{} + if err := p.BLOCK.Read(ctx, iprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.BLOCK), err) + } + return nil } func (p *BloomFilterAlgorithm) Write(ctx context.Context, oprot thrift.TProtocol) error { - if c := p.CountSetFieldsBloomFilterAlgorithm(); c != 1 { - return fmt.Errorf("%T write union: exactly one field must be set (%d set)", p, c) - } - if err := oprot.WriteStructBegin(ctx, "BloomFilterAlgorithm"); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) } - if p != nil { - if err := p.writeField1(ctx, oprot); err != nil { return err } - } - if err := oprot.WriteFieldStop(ctx); err != nil { - return thrift.PrependError("write field stop error: ", err) } - if err := oprot.WriteStructEnd(ctx); err != nil { - return thrift.PrependError("write struct stop error: ", err) } - return nil + if c := p.CountSetFieldsBloomFilterAlgorithm(); c != 1 { + return fmt.Errorf("%T write union: exactly one field must be set (%d set)", p, c) + } + if err := oprot.WriteStructBegin(ctx, "BloomFilterAlgorithm"); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) + } + if p != nil { + if err := p.writeField1(ctx, oprot); err != nil { + return err + } + } + if err := oprot.WriteFieldStop(ctx); err != nil { + return thrift.PrependError("write field stop error: ", err) + } + if err := oprot.WriteStructEnd(ctx); err != nil { + return thrift.PrependError("write struct stop error: ", err) + } + return nil } func (p *BloomFilterAlgorithm) writeField1(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetBLOCK() { - if err := oprot.WriteFieldBegin(ctx, "BLOCK", thrift.STRUCT, 1); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:BLOCK: ", p), err) } - if err := p.BLOCK.Write(ctx, oprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.BLOCK), err) - } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 1:BLOCK: ", p), err) } - } - return err + if p.IsSetBLOCK() { + if err := oprot.WriteFieldBegin(ctx, "BLOCK", thrift.STRUCT, 1); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:BLOCK: ", p), err) + } + if err := p.BLOCK.Write(ctx, oprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.BLOCK), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 1:BLOCK: ", p), err) + } + } + return err } func (p *BloomFilterAlgorithm) Equals(other *BloomFilterAlgorithm) bool { - if p == other { - return true - } else if p == nil || other == nil { - return false - } - if !p.BLOCK.Equals(other.BLOCK) { return false } - return true + if p == other { + return true + } else if p == nil || other == nil { + return false + } + if !p.BLOCK.Equals(other.BLOCK) { + return false + } + return true } func (p *BloomFilterAlgorithm) String() string { - if p == nil { - return "" - } - return fmt.Sprintf("BloomFilterAlgorithm(%+v)", *p) + if p == nil { + return "" + } + return fmt.Sprintf("BloomFilterAlgorithm(%+v)", *p) } func (p *BloomFilterAlgorithm) Validate() error { - return nil + return nil } + // Hash strategy type annotation. xxHash is an extremely fast non-cryptographic hash // algorithm. It uses 64 bits version of xxHash. -// type XxHash struct { } func NewXxHash() *XxHash { - return &XxHash{} + return &XxHash{} } func (p *XxHash) Read(ctx context.Context, iprot thrift.TProtocol) error { - if _, err := iprot.ReadStructBegin(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) - } - - - for { - _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) - if err != nil { - return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) - } - if fieldTypeId == thrift.STOP { break; } - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - if err := iprot.ReadFieldEnd(ctx); err != nil { - return err - } - } - if err := iprot.ReadStructEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) - } - return nil + if _, err := iprot.ReadStructBegin(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) + } + + for { + _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) + if err != nil { + return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) + } + if fieldTypeId == thrift.STOP { + break + } + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + if err := iprot.ReadFieldEnd(ctx); err != nil { + return err + } + } + if err := iprot.ReadStructEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) + } + return nil } func (p *XxHash) Write(ctx context.Context, oprot thrift.TProtocol) error { - if err := oprot.WriteStructBegin(ctx, "XxHash"); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) } - if p != nil { - } - if err := oprot.WriteFieldStop(ctx); err != nil { - return thrift.PrependError("write field stop error: ", err) } - if err := oprot.WriteStructEnd(ctx); err != nil { - return thrift.PrependError("write struct stop error: ", err) } - return nil + if err := oprot.WriteStructBegin(ctx, "XxHash"); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) + } + if p != nil { + } + if err := oprot.WriteFieldStop(ctx); err != nil { + return thrift.PrependError("write field stop error: ", err) + } + if err := oprot.WriteStructEnd(ctx); err != nil { + return thrift.PrependError("write struct stop error: ", err) + } + return nil } func (p *XxHash) Equals(other *XxHash) bool { - if p == other { - return true - } else if p == nil || other == nil { - return false - } - return true + if p == other { + return true + } else if p == nil || other == nil { + return false + } + return true } func (p *XxHash) String() string { - if p == nil { - return "" - } - return fmt.Sprintf("XxHash(%+v)", *p) + if p == nil { + return "" + } + return fmt.Sprintf("XxHash(%+v)", *p) } func (p *XxHash) Validate() error { - return nil + return nil } + // The hash function used in Bloom filter. This function takes the hash of a column value // using plain encoding. -// -// +// // Attributes: -// - XXHASH: xxHash Strategy. * +// - XXHASH: xxHash Strategy. * type BloomFilterHash struct { - XXHASH *XxHash `thrift:"XXHASH,1" db:"XXHASH" json:"XXHASH,omitempty"` + XXHASH *XxHash `thrift:"XXHASH,1" db:"XXHASH" json:"XXHASH,omitempty"` } func NewBloomFilterHash() *BloomFilterHash { - return &BloomFilterHash{} + return &BloomFilterHash{} } var BloomFilterHash_XXHASH_DEFAULT *XxHash + func (p *BloomFilterHash) GetXXHASH() *XxHash { - if !p.IsSetXXHASH() { - return BloomFilterHash_XXHASH_DEFAULT - } -return p.XXHASH + if !p.IsSetXXHASH() { + return BloomFilterHash_XXHASH_DEFAULT + } + return p.XXHASH } func (p *BloomFilterHash) CountSetFieldsBloomFilterHash() int { - count := 0 - if (p.IsSetXXHASH()) { - count++ - } - return count + count := 0 + if p.IsSetXXHASH() { + count++ + } + return count } func (p *BloomFilterHash) IsSetXXHASH() bool { - return p.XXHASH != nil + return p.XXHASH != nil } func (p *BloomFilterHash) Read(ctx context.Context, iprot thrift.TProtocol) error { - if _, err := iprot.ReadStructBegin(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) - } - - - for { - _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) - if err != nil { - return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) - } - if fieldTypeId == thrift.STOP { break; } - switch fieldId { - case 1: - if fieldTypeId == thrift.STRUCT { - if err := p.ReadField1(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - default: - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - if err := iprot.ReadFieldEnd(ctx); err != nil { - return err - } - } - if err := iprot.ReadStructEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) - } - return nil -} - -func (p *BloomFilterHash) ReadField1(ctx context.Context, iprot thrift.TProtocol) error { - p.XXHASH = &XxHash{} - if err := p.XXHASH.Read(ctx, iprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.XXHASH), err) - } - return nil + if _, err := iprot.ReadStructBegin(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) + } + + for { + _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) + if err != nil { + return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) + } + if fieldTypeId == thrift.STOP { + break + } + switch fieldId { + case 1: + if fieldTypeId == thrift.STRUCT { + if err := p.ReadField1(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + default: + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + if err := iprot.ReadFieldEnd(ctx); err != nil { + return err + } + } + if err := iprot.ReadStructEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) + } + return nil +} + +func (p *BloomFilterHash) ReadField1(ctx context.Context, iprot thrift.TProtocol) error { + p.XXHASH = &XxHash{} + if err := p.XXHASH.Read(ctx, iprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.XXHASH), err) + } + return nil } func (p *BloomFilterHash) Write(ctx context.Context, oprot thrift.TProtocol) error { - if c := p.CountSetFieldsBloomFilterHash(); c != 1 { - return fmt.Errorf("%T write union: exactly one field must be set (%d set)", p, c) - } - if err := oprot.WriteStructBegin(ctx, "BloomFilterHash"); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) } - if p != nil { - if err := p.writeField1(ctx, oprot); err != nil { return err } - } - if err := oprot.WriteFieldStop(ctx); err != nil { - return thrift.PrependError("write field stop error: ", err) } - if err := oprot.WriteStructEnd(ctx); err != nil { - return thrift.PrependError("write struct stop error: ", err) } - return nil + if c := p.CountSetFieldsBloomFilterHash(); c != 1 { + return fmt.Errorf("%T write union: exactly one field must be set (%d set)", p, c) + } + if err := oprot.WriteStructBegin(ctx, "BloomFilterHash"); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) + } + if p != nil { + if err := p.writeField1(ctx, oprot); err != nil { + return err + } + } + if err := oprot.WriteFieldStop(ctx); err != nil { + return thrift.PrependError("write field stop error: ", err) + } + if err := oprot.WriteStructEnd(ctx); err != nil { + return thrift.PrependError("write struct stop error: ", err) + } + return nil } func (p *BloomFilterHash) writeField1(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetXXHASH() { - if err := oprot.WriteFieldBegin(ctx, "XXHASH", thrift.STRUCT, 1); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:XXHASH: ", p), err) } - if err := p.XXHASH.Write(ctx, oprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.XXHASH), err) - } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 1:XXHASH: ", p), err) } - } - return err + if p.IsSetXXHASH() { + if err := oprot.WriteFieldBegin(ctx, "XXHASH", thrift.STRUCT, 1); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:XXHASH: ", p), err) + } + if err := p.XXHASH.Write(ctx, oprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.XXHASH), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 1:XXHASH: ", p), err) + } + } + return err } func (p *BloomFilterHash) Equals(other *BloomFilterHash) bool { - if p == other { - return true - } else if p == nil || other == nil { - return false - } - if !p.XXHASH.Equals(other.XXHASH) { return false } - return true + if p == other { + return true + } else if p == nil || other == nil { + return false + } + if !p.XXHASH.Equals(other.XXHASH) { + return false + } + return true } func (p *BloomFilterHash) String() string { - if p == nil { - return "" - } - return fmt.Sprintf("BloomFilterHash(%+v)", *p) + if p == nil { + return "" + } + return fmt.Sprintf("BloomFilterHash(%+v)", *p) } func (p *BloomFilterHash) Validate() error { - return nil + return nil } + // The compression used in the Bloom filter. -// type Uncompressed struct { } func NewUncompressed() *Uncompressed { - return &Uncompressed{} + return &Uncompressed{} } func (p *Uncompressed) Read(ctx context.Context, iprot thrift.TProtocol) error { - if _, err := iprot.ReadStructBegin(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) - } - - - for { - _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) - if err != nil { - return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) - } - if fieldTypeId == thrift.STOP { break; } - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - if err := iprot.ReadFieldEnd(ctx); err != nil { - return err - } - } - if err := iprot.ReadStructEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) - } - return nil + if _, err := iprot.ReadStructBegin(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) + } + + for { + _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) + if err != nil { + return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) + } + if fieldTypeId == thrift.STOP { + break + } + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + if err := iprot.ReadFieldEnd(ctx); err != nil { + return err + } + } + if err := iprot.ReadStructEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) + } + return nil } func (p *Uncompressed) Write(ctx context.Context, oprot thrift.TProtocol) error { - if err := oprot.WriteStructBegin(ctx, "Uncompressed"); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) } - if p != nil { - } - if err := oprot.WriteFieldStop(ctx); err != nil { - return thrift.PrependError("write field stop error: ", err) } - if err := oprot.WriteStructEnd(ctx); err != nil { - return thrift.PrependError("write struct stop error: ", err) } - return nil + if err := oprot.WriteStructBegin(ctx, "Uncompressed"); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) + } + if p != nil { + } + if err := oprot.WriteFieldStop(ctx); err != nil { + return thrift.PrependError("write field stop error: ", err) + } + if err := oprot.WriteStructEnd(ctx); err != nil { + return thrift.PrependError("write struct stop error: ", err) + } + return nil } func (p *Uncompressed) Equals(other *Uncompressed) bool { - if p == other { - return true - } else if p == nil || other == nil { - return false - } - return true + if p == other { + return true + } else if p == nil || other == nil { + return false + } + return true } func (p *Uncompressed) String() string { - if p == nil { - return "" - } - return fmt.Sprintf("Uncompressed(%+v)", *p) + if p == nil { + return "" + } + return fmt.Sprintf("Uncompressed(%+v)", *p) } func (p *Uncompressed) Validate() error { - return nil + return nil } + // Attributes: -// - UNCOMPRESSED +// - UNCOMPRESSED type BloomFilterCompression struct { - UNCOMPRESSED *Uncompressed `thrift:"UNCOMPRESSED,1" db:"UNCOMPRESSED" json:"UNCOMPRESSED,omitempty"` + UNCOMPRESSED *Uncompressed `thrift:"UNCOMPRESSED,1" db:"UNCOMPRESSED" json:"UNCOMPRESSED,omitempty"` } func NewBloomFilterCompression() *BloomFilterCompression { - return &BloomFilterCompression{} + return &BloomFilterCompression{} } var BloomFilterCompression_UNCOMPRESSED_DEFAULT *Uncompressed + func (p *BloomFilterCompression) GetUNCOMPRESSED() *Uncompressed { - if !p.IsSetUNCOMPRESSED() { - return BloomFilterCompression_UNCOMPRESSED_DEFAULT - } -return p.UNCOMPRESSED + if !p.IsSetUNCOMPRESSED() { + return BloomFilterCompression_UNCOMPRESSED_DEFAULT + } + return p.UNCOMPRESSED } func (p *BloomFilterCompression) CountSetFieldsBloomFilterCompression() int { - count := 0 - if (p.IsSetUNCOMPRESSED()) { - count++ - } - return count + count := 0 + if p.IsSetUNCOMPRESSED() { + count++ + } + return count } func (p *BloomFilterCompression) IsSetUNCOMPRESSED() bool { - return p.UNCOMPRESSED != nil + return p.UNCOMPRESSED != nil } func (p *BloomFilterCompression) Read(ctx context.Context, iprot thrift.TProtocol) error { - if _, err := iprot.ReadStructBegin(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) - } - - - for { - _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) - if err != nil { - return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) - } - if fieldTypeId == thrift.STOP { break; } - switch fieldId { - case 1: - if fieldTypeId == thrift.STRUCT { - if err := p.ReadField1(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - default: - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - if err := iprot.ReadFieldEnd(ctx); err != nil { - return err - } - } - if err := iprot.ReadStructEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) - } - return nil -} - -func (p *BloomFilterCompression) ReadField1(ctx context.Context, iprot thrift.TProtocol) error { - p.UNCOMPRESSED = &Uncompressed{} - if err := p.UNCOMPRESSED.Read(ctx, iprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.UNCOMPRESSED), err) - } - return nil + if _, err := iprot.ReadStructBegin(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) + } + + for { + _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) + if err != nil { + return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) + } + if fieldTypeId == thrift.STOP { + break + } + switch fieldId { + case 1: + if fieldTypeId == thrift.STRUCT { + if err := p.ReadField1(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + default: + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + if err := iprot.ReadFieldEnd(ctx); err != nil { + return err + } + } + if err := iprot.ReadStructEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) + } + return nil +} + +func (p *BloomFilterCompression) ReadField1(ctx context.Context, iprot thrift.TProtocol) error { + p.UNCOMPRESSED = &Uncompressed{} + if err := p.UNCOMPRESSED.Read(ctx, iprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.UNCOMPRESSED), err) + } + return nil } func (p *BloomFilterCompression) Write(ctx context.Context, oprot thrift.TProtocol) error { - if c := p.CountSetFieldsBloomFilterCompression(); c != 1 { - return fmt.Errorf("%T write union: exactly one field must be set (%d set)", p, c) - } - if err := oprot.WriteStructBegin(ctx, "BloomFilterCompression"); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) } - if p != nil { - if err := p.writeField1(ctx, oprot); err != nil { return err } - } - if err := oprot.WriteFieldStop(ctx); err != nil { - return thrift.PrependError("write field stop error: ", err) } - if err := oprot.WriteStructEnd(ctx); err != nil { - return thrift.PrependError("write struct stop error: ", err) } - return nil + if c := p.CountSetFieldsBloomFilterCompression(); c != 1 { + return fmt.Errorf("%T write union: exactly one field must be set (%d set)", p, c) + } + if err := oprot.WriteStructBegin(ctx, "BloomFilterCompression"); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) + } + if p != nil { + if err := p.writeField1(ctx, oprot); err != nil { + return err + } + } + if err := oprot.WriteFieldStop(ctx); err != nil { + return thrift.PrependError("write field stop error: ", err) + } + if err := oprot.WriteStructEnd(ctx); err != nil { + return thrift.PrependError("write struct stop error: ", err) + } + return nil } func (p *BloomFilterCompression) writeField1(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetUNCOMPRESSED() { - if err := oprot.WriteFieldBegin(ctx, "UNCOMPRESSED", thrift.STRUCT, 1); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:UNCOMPRESSED: ", p), err) } - if err := p.UNCOMPRESSED.Write(ctx, oprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.UNCOMPRESSED), err) - } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 1:UNCOMPRESSED: ", p), err) } - } - return err + if p.IsSetUNCOMPRESSED() { + if err := oprot.WriteFieldBegin(ctx, "UNCOMPRESSED", thrift.STRUCT, 1); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:UNCOMPRESSED: ", p), err) + } + if err := p.UNCOMPRESSED.Write(ctx, oprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.UNCOMPRESSED), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 1:UNCOMPRESSED: ", p), err) + } + } + return err } func (p *BloomFilterCompression) Equals(other *BloomFilterCompression) bool { - if p == other { - return true - } else if p == nil || other == nil { - return false - } - if !p.UNCOMPRESSED.Equals(other.UNCOMPRESSED) { return false } - return true + if p == other { + return true + } else if p == nil || other == nil { + return false + } + if !p.UNCOMPRESSED.Equals(other.UNCOMPRESSED) { + return false + } + return true } func (p *BloomFilterCompression) String() string { - if p == nil { - return "" - } - return fmt.Sprintf("BloomFilterCompression(%+v)", *p) + if p == nil { + return "" + } + return fmt.Sprintf("BloomFilterCompression(%+v)", *p) } func (p *BloomFilterCompression) Validate() error { - return nil + return nil } + // Bloom filter header is stored at beginning of Bloom filter data of each column // and followed by its bitset. -// -// +// // Attributes: -// - NumBytes: The size of bitset in bytes * -// - Algorithm: The algorithm for setting bits. * -// - Hash: The hash function used for Bloom filter. * -// - Compression: The compression used in the Bloom filter * +// - NumBytes: The size of bitset in bytes * +// - Algorithm: The algorithm for setting bits. * +// - Hash: The hash function used for Bloom filter. * +// - Compression: The compression used in the Bloom filter * type BloomFilterHeader struct { - NumBytes int32 `thrift:"numBytes,1,required" db:"numBytes" json:"numBytes"` - Algorithm *BloomFilterAlgorithm `thrift:"algorithm,2,required" db:"algorithm" json:"algorithm"` - Hash *BloomFilterHash `thrift:"hash,3,required" db:"hash" json:"hash"` - Compression *BloomFilterCompression `thrift:"compression,4,required" db:"compression" json:"compression"` + NumBytes int32 `thrift:"numBytes,1,required" db:"numBytes" json:"numBytes"` + Algorithm *BloomFilterAlgorithm `thrift:"algorithm,2,required" db:"algorithm" json:"algorithm"` + Hash *BloomFilterHash `thrift:"hash,3,required" db:"hash" json:"hash"` + Compression *BloomFilterCompression `thrift:"compression,4,required" db:"compression" json:"compression"` } func NewBloomFilterHeader() *BloomFilterHeader { - return &BloomFilterHeader{} + return &BloomFilterHeader{} } - func (p *BloomFilterHeader) GetNumBytes() int32 { - return p.NumBytes + return p.NumBytes } + var BloomFilterHeader_Algorithm_DEFAULT *BloomFilterAlgorithm + func (p *BloomFilterHeader) GetAlgorithm() *BloomFilterAlgorithm { - if !p.IsSetAlgorithm() { - return BloomFilterHeader_Algorithm_DEFAULT - } -return p.Algorithm + if !p.IsSetAlgorithm() { + return BloomFilterHeader_Algorithm_DEFAULT + } + return p.Algorithm } + var BloomFilterHeader_Hash_DEFAULT *BloomFilterHash + func (p *BloomFilterHeader) GetHash() *BloomFilterHash { - if !p.IsSetHash() { - return BloomFilterHeader_Hash_DEFAULT - } -return p.Hash + if !p.IsSetHash() { + return BloomFilterHeader_Hash_DEFAULT + } + return p.Hash } + var BloomFilterHeader_Compression_DEFAULT *BloomFilterCompression + func (p *BloomFilterHeader) GetCompression() *BloomFilterCompression { - if !p.IsSetCompression() { - return BloomFilterHeader_Compression_DEFAULT - } -return p.Compression + if !p.IsSetCompression() { + return BloomFilterHeader_Compression_DEFAULT + } + return p.Compression } func (p *BloomFilterHeader) IsSetAlgorithm() bool { - return p.Algorithm != nil + return p.Algorithm != nil } func (p *BloomFilterHeader) IsSetHash() bool { - return p.Hash != nil + return p.Hash != nil } func (p *BloomFilterHeader) IsSetCompression() bool { - return p.Compression != nil + return p.Compression != nil } func (p *BloomFilterHeader) Read(ctx context.Context, iprot thrift.TProtocol) error { - if _, err := iprot.ReadStructBegin(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) - } - - var issetNumBytes bool = false; - var issetAlgorithm bool = false; - var issetHash bool = false; - var issetCompression bool = false; - - for { - _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) - if err != nil { - return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) - } - if fieldTypeId == thrift.STOP { break; } - switch fieldId { - case 1: - if fieldTypeId == thrift.I32 { - if err := p.ReadField1(ctx, iprot); err != nil { - return err - } - issetNumBytes = true - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 2: - if fieldTypeId == thrift.STRUCT { - if err := p.ReadField2(ctx, iprot); err != nil { - return err - } - issetAlgorithm = true - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 3: - if fieldTypeId == thrift.STRUCT { - if err := p.ReadField3(ctx, iprot); err != nil { - return err - } - issetHash = true - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 4: - if fieldTypeId == thrift.STRUCT { - if err := p.ReadField4(ctx, iprot); err != nil { - return err - } - issetCompression = true - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - default: - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - if err := iprot.ReadFieldEnd(ctx); err != nil { - return err - } - } - if err := iprot.ReadStructEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) - } - if !issetNumBytes{ - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field NumBytes is not set")); - } - if !issetAlgorithm{ - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Algorithm is not set")); - } - if !issetHash{ - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Hash is not set")); - } - if !issetCompression{ - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Compression is not set")); - } - return nil -} - -func (p *BloomFilterHeader) ReadField1(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadI32(ctx); err != nil { - return thrift.PrependError("error reading field 1: ", err) -} else { - p.NumBytes = v -} - return nil -} - -func (p *BloomFilterHeader) ReadField2(ctx context.Context, iprot thrift.TProtocol) error { - p.Algorithm = &BloomFilterAlgorithm{} - if err := p.Algorithm.Read(ctx, iprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.Algorithm), err) - } - return nil -} - -func (p *BloomFilterHeader) ReadField3(ctx context.Context, iprot thrift.TProtocol) error { - p.Hash = &BloomFilterHash{} - if err := p.Hash.Read(ctx, iprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.Hash), err) - } - return nil -} - -func (p *BloomFilterHeader) ReadField4(ctx context.Context, iprot thrift.TProtocol) error { - p.Compression = &BloomFilterCompression{} - if err := p.Compression.Read(ctx, iprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.Compression), err) - } - return nil + if _, err := iprot.ReadStructBegin(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) + } + + var issetNumBytes bool = false + var issetAlgorithm bool = false + var issetHash bool = false + var issetCompression bool = false + + for { + _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) + if err != nil { + return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) + } + if fieldTypeId == thrift.STOP { + break + } + switch fieldId { + case 1: + if fieldTypeId == thrift.I32 { + if err := p.ReadField1(ctx, iprot); err != nil { + return err + } + issetNumBytes = true + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 2: + if fieldTypeId == thrift.STRUCT { + if err := p.ReadField2(ctx, iprot); err != nil { + return err + } + issetAlgorithm = true + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 3: + if fieldTypeId == thrift.STRUCT { + if err := p.ReadField3(ctx, iprot); err != nil { + return err + } + issetHash = true + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 4: + if fieldTypeId == thrift.STRUCT { + if err := p.ReadField4(ctx, iprot); err != nil { + return err + } + issetCompression = true + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + default: + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + if err := iprot.ReadFieldEnd(ctx); err != nil { + return err + } + } + if err := iprot.ReadStructEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) + } + if !issetNumBytes { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field NumBytes is not set")) + } + if !issetAlgorithm { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Algorithm is not set")) + } + if !issetHash { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Hash is not set")) + } + if !issetCompression { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Compression is not set")) + } + return nil +} + +func (p *BloomFilterHeader) ReadField1(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadI32(ctx); err != nil { + return thrift.PrependError("error reading field 1: ", err) + } else { + p.NumBytes = v + } + return nil +} + +func (p *BloomFilterHeader) ReadField2(ctx context.Context, iprot thrift.TProtocol) error { + p.Algorithm = &BloomFilterAlgorithm{} + if err := p.Algorithm.Read(ctx, iprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.Algorithm), err) + } + return nil +} + +func (p *BloomFilterHeader) ReadField3(ctx context.Context, iprot thrift.TProtocol) error { + p.Hash = &BloomFilterHash{} + if err := p.Hash.Read(ctx, iprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.Hash), err) + } + return nil +} + +func (p *BloomFilterHeader) ReadField4(ctx context.Context, iprot thrift.TProtocol) error { + p.Compression = &BloomFilterCompression{} + if err := p.Compression.Read(ctx, iprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.Compression), err) + } + return nil } func (p *BloomFilterHeader) Write(ctx context.Context, oprot thrift.TProtocol) error { - if err := oprot.WriteStructBegin(ctx, "BloomFilterHeader"); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) } - if p != nil { - if err := p.writeField1(ctx, oprot); err != nil { return err } - if err := p.writeField2(ctx, oprot); err != nil { return err } - if err := p.writeField3(ctx, oprot); err != nil { return err } - if err := p.writeField4(ctx, oprot); err != nil { return err } - } - if err := oprot.WriteFieldStop(ctx); err != nil { - return thrift.PrependError("write field stop error: ", err) } - if err := oprot.WriteStructEnd(ctx); err != nil { - return thrift.PrependError("write struct stop error: ", err) } - return nil + if err := oprot.WriteStructBegin(ctx, "BloomFilterHeader"); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) + } + if p != nil { + if err := p.writeField1(ctx, oprot); err != nil { + return err + } + if err := p.writeField2(ctx, oprot); err != nil { + return err + } + if err := p.writeField3(ctx, oprot); err != nil { + return err + } + if err := p.writeField4(ctx, oprot); err != nil { + return err + } + } + if err := oprot.WriteFieldStop(ctx); err != nil { + return thrift.PrependError("write field stop error: ", err) + } + if err := oprot.WriteStructEnd(ctx); err != nil { + return thrift.PrependError("write struct stop error: ", err) + } + return nil } func (p *BloomFilterHeader) writeField1(ctx context.Context, oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin(ctx, "numBytes", thrift.I32, 1); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:numBytes: ", p), err) } - if err := oprot.WriteI32(ctx, int32(p.NumBytes)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.numBytes (1) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 1:numBytes: ", p), err) } - return err + if err := oprot.WriteFieldBegin(ctx, "numBytes", thrift.I32, 1); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:numBytes: ", p), err) + } + if err := oprot.WriteI32(ctx, int32(p.NumBytes)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.numBytes (1) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 1:numBytes: ", p), err) + } + return err } func (p *BloomFilterHeader) writeField2(ctx context.Context, oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin(ctx, "algorithm", thrift.STRUCT, 2); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:algorithm: ", p), err) } - if err := p.Algorithm.Write(ctx, oprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.Algorithm), err) - } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 2:algorithm: ", p), err) } - return err + if err := oprot.WriteFieldBegin(ctx, "algorithm", thrift.STRUCT, 2); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:algorithm: ", p), err) + } + if err := p.Algorithm.Write(ctx, oprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.Algorithm), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 2:algorithm: ", p), err) + } + return err } func (p *BloomFilterHeader) writeField3(ctx context.Context, oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin(ctx, "hash", thrift.STRUCT, 3); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 3:hash: ", p), err) } - if err := p.Hash.Write(ctx, oprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.Hash), err) - } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 3:hash: ", p), err) } - return err + if err := oprot.WriteFieldBegin(ctx, "hash", thrift.STRUCT, 3); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 3:hash: ", p), err) + } + if err := p.Hash.Write(ctx, oprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.Hash), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 3:hash: ", p), err) + } + return err } func (p *BloomFilterHeader) writeField4(ctx context.Context, oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin(ctx, "compression", thrift.STRUCT, 4); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 4:compression: ", p), err) } - if err := p.Compression.Write(ctx, oprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.Compression), err) - } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 4:compression: ", p), err) } - return err + if err := oprot.WriteFieldBegin(ctx, "compression", thrift.STRUCT, 4); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 4:compression: ", p), err) + } + if err := p.Compression.Write(ctx, oprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.Compression), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 4:compression: ", p), err) + } + return err } func (p *BloomFilterHeader) Equals(other *BloomFilterHeader) bool { - if p == other { - return true - } else if p == nil || other == nil { - return false - } - if p.NumBytes != other.NumBytes { return false } - if !p.Algorithm.Equals(other.Algorithm) { return false } - if !p.Hash.Equals(other.Hash) { return false } - if !p.Compression.Equals(other.Compression) { return false } - return true + if p == other { + return true + } else if p == nil || other == nil { + return false + } + if p.NumBytes != other.NumBytes { + return false + } + if !p.Algorithm.Equals(other.Algorithm) { + return false + } + if !p.Hash.Equals(other.Hash) { + return false + } + if !p.Compression.Equals(other.Compression) { + return false + } + return true } func (p *BloomFilterHeader) String() string { - if p == nil { - return "" - } - return fmt.Sprintf("BloomFilterHeader(%+v)", *p) + if p == nil { + return "" + } + return fmt.Sprintf("BloomFilterHeader(%+v)", *p) } func (p *BloomFilterHeader) Validate() error { - return nil + return nil } + // Attributes: -// - Type: the type of the page: indicates which of the *_header fields is set * -// - UncompressedPageSize: Uncompressed page size in bytes (not including this header) * -// - CompressedPageSize: Compressed (and potentially encrypted) page size in bytes, not including this header * -// - Crc: The 32-bit CRC checksum for the page, to be be calculated as follows: -// -// - The standard CRC32 algorithm is used (with polynomial 0x04C11DB7, -// the same as in e.g. GZip). -// - All page types can have a CRC (v1 and v2 data pages, dictionary pages, -// etc.). -// - The CRC is computed on the serialization binary representation of the page -// (as written to disk), excluding the page header. For example, for v1 -// data pages, the CRC is computed on the concatenation of repetition levels, -// definition levels and column values (optionally compressed, optionally -// encrypted). -// - The CRC computation therefore takes place after any compression -// and encryption steps, if any. -// +// +// - Type: the type of the page: indicates which of the *_header fields is set * +// +// - UncompressedPageSize: Uncompressed page size in bytes (not including this header) * +// +// - CompressedPageSize: Compressed (and potentially encrypted) page size in bytes, not including this header * +// +// - Crc: The 32-bit CRC checksum for the page, to be be calculated as follows: +// +// - The standard CRC32 algorithm is used (with polynomial 0x04C11DB7, +// the same as in e.g. GZip). +// +// - All page types can have a CRC (v1 and v2 data pages, dictionary pages, +// etc.). +// +// - The CRC is computed on the serialization binary representation of the page +// (as written to disk), excluding the page header. For example, for v1 +// data pages, the CRC is computed on the concatenation of repetition levels, +// definition levels and column values (optionally compressed, optionally +// encrypted). +// +// - The CRC computation therefore takes place after any compression +// and encryption steps, if any. +// // If enabled, this allows for disabling checksumming in HDFS if only a few // pages need to be read. -// - DataPageHeader -// - IndexPageHeader -// - DictionaryPageHeader -// - DataPageHeaderV2 +// - DataPageHeader +// - IndexPageHeader +// - DictionaryPageHeader +// - DataPageHeaderV2 type PageHeader struct { - Type PageType `thrift:"type,1,required" db:"type" json:"type"` - UncompressedPageSize int32 `thrift:"uncompressed_page_size,2,required" db:"uncompressed_page_size" json:"uncompressed_page_size"` - CompressedPageSize int32 `thrift:"compressed_page_size,3,required" db:"compressed_page_size" json:"compressed_page_size"` - Crc *int32 `thrift:"crc,4" db:"crc" json:"crc,omitempty"` - DataPageHeader *DataPageHeader `thrift:"data_page_header,5" db:"data_page_header" json:"data_page_header,omitempty"` - IndexPageHeader *IndexPageHeader `thrift:"index_page_header,6" db:"index_page_header" json:"index_page_header,omitempty"` - DictionaryPageHeader *DictionaryPageHeader `thrift:"dictionary_page_header,7" db:"dictionary_page_header" json:"dictionary_page_header,omitempty"` - DataPageHeaderV2 *DataPageHeaderV2 `thrift:"data_page_header_v2,8" db:"data_page_header_v2" json:"data_page_header_v2,omitempty"` + Type PageType `thrift:"type,1,required" db:"type" json:"type"` + UncompressedPageSize int32 `thrift:"uncompressed_page_size,2,required" db:"uncompressed_page_size" json:"uncompressed_page_size"` + CompressedPageSize int32 `thrift:"compressed_page_size,3,required" db:"compressed_page_size" json:"compressed_page_size"` + Crc *int32 `thrift:"crc,4" db:"crc" json:"crc,omitempty"` + DataPageHeader *DataPageHeader `thrift:"data_page_header,5" db:"data_page_header" json:"data_page_header,omitempty"` + IndexPageHeader *IndexPageHeader `thrift:"index_page_header,6" db:"index_page_header" json:"index_page_header,omitempty"` + DictionaryPageHeader *DictionaryPageHeader `thrift:"dictionary_page_header,7" db:"dictionary_page_header" json:"dictionary_page_header,omitempty"` + DataPageHeaderV2 *DataPageHeaderV2 `thrift:"data_page_header_v2,8" db:"data_page_header_v2" json:"data_page_header_v2,omitempty"` } func NewPageHeader() *PageHeader { - return &PageHeader{} + return &PageHeader{} } - func (p *PageHeader) GetType() PageType { - return p.Type + return p.Type } func (p *PageHeader) GetUncompressedPageSize() int32 { - return p.UncompressedPageSize + return p.UncompressedPageSize } func (p *PageHeader) GetCompressedPageSize() int32 { - return p.CompressedPageSize + return p.CompressedPageSize } + var PageHeader_Crc_DEFAULT int32 + func (p *PageHeader) GetCrc() int32 { - if !p.IsSetCrc() { - return PageHeader_Crc_DEFAULT - } -return *p.Crc + if !p.IsSetCrc() { + return PageHeader_Crc_DEFAULT + } + return *p.Crc } + var PageHeader_DataPageHeader_DEFAULT *DataPageHeader + func (p *PageHeader) GetDataPageHeader() *DataPageHeader { - if !p.IsSetDataPageHeader() { - return PageHeader_DataPageHeader_DEFAULT - } -return p.DataPageHeader + if !p.IsSetDataPageHeader() { + return PageHeader_DataPageHeader_DEFAULT + } + return p.DataPageHeader } + var PageHeader_IndexPageHeader_DEFAULT *IndexPageHeader + func (p *PageHeader) GetIndexPageHeader() *IndexPageHeader { - if !p.IsSetIndexPageHeader() { - return PageHeader_IndexPageHeader_DEFAULT - } -return p.IndexPageHeader + if !p.IsSetIndexPageHeader() { + return PageHeader_IndexPageHeader_DEFAULT + } + return p.IndexPageHeader } + var PageHeader_DictionaryPageHeader_DEFAULT *DictionaryPageHeader + func (p *PageHeader) GetDictionaryPageHeader() *DictionaryPageHeader { - if !p.IsSetDictionaryPageHeader() { - return PageHeader_DictionaryPageHeader_DEFAULT - } -return p.DictionaryPageHeader + if !p.IsSetDictionaryPageHeader() { + return PageHeader_DictionaryPageHeader_DEFAULT + } + return p.DictionaryPageHeader } + var PageHeader_DataPageHeaderV2_DEFAULT *DataPageHeaderV2 + func (p *PageHeader) GetDataPageHeaderV2() *DataPageHeaderV2 { - if !p.IsSetDataPageHeaderV2() { - return PageHeader_DataPageHeaderV2_DEFAULT - } -return p.DataPageHeaderV2 + if !p.IsSetDataPageHeaderV2() { + return PageHeader_DataPageHeaderV2_DEFAULT + } + return p.DataPageHeaderV2 } func (p *PageHeader) IsSetCrc() bool { - return p.Crc != nil + return p.Crc != nil } func (p *PageHeader) IsSetDataPageHeader() bool { - return p.DataPageHeader != nil + return p.DataPageHeader != nil } func (p *PageHeader) IsSetIndexPageHeader() bool { - return p.IndexPageHeader != nil + return p.IndexPageHeader != nil } func (p *PageHeader) IsSetDictionaryPageHeader() bool { - return p.DictionaryPageHeader != nil + return p.DictionaryPageHeader != nil } func (p *PageHeader) IsSetDataPageHeaderV2() bool { - return p.DataPageHeaderV2 != nil + return p.DataPageHeaderV2 != nil } func (p *PageHeader) Read(ctx context.Context, iprot thrift.TProtocol) error { - if _, err := iprot.ReadStructBegin(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) - } - - var issetType bool = false; - var issetUncompressedPageSize bool = false; - var issetCompressedPageSize bool = false; - - for { - _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) - if err != nil { - return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) - } - if fieldTypeId == thrift.STOP { break; } - switch fieldId { - case 1: - if fieldTypeId == thrift.I32 { - if err := p.ReadField1(ctx, iprot); err != nil { - return err - } - issetType = true - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 2: - if fieldTypeId == thrift.I32 { - if err := p.ReadField2(ctx, iprot); err != nil { - return err - } - issetUncompressedPageSize = true - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 3: - if fieldTypeId == thrift.I32 { - if err := p.ReadField3(ctx, iprot); err != nil { - return err - } - issetCompressedPageSize = true - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 4: - if fieldTypeId == thrift.I32 { - if err := p.ReadField4(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 5: - if fieldTypeId == thrift.STRUCT { - if err := p.ReadField5(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 6: - if fieldTypeId == thrift.STRUCT { - if err := p.ReadField6(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 7: - if fieldTypeId == thrift.STRUCT { - if err := p.ReadField7(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 8: - if fieldTypeId == thrift.STRUCT { - if err := p.ReadField8(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - default: - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - if err := iprot.ReadFieldEnd(ctx); err != nil { - return err - } - } - if err := iprot.ReadStructEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) - } - if !issetType{ - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Type is not set")); - } - if !issetUncompressedPageSize{ - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field UncompressedPageSize is not set")); - } - if !issetCompressedPageSize{ - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field CompressedPageSize is not set")); - } - return nil -} - -func (p *PageHeader) ReadField1(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadI32(ctx); err != nil { - return thrift.PrependError("error reading field 1: ", err) -} else { - temp := PageType(v) - p.Type = temp -} - return nil -} - -func (p *PageHeader) ReadField2(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadI32(ctx); err != nil { - return thrift.PrependError("error reading field 2: ", err) -} else { - p.UncompressedPageSize = v -} - return nil -} - -func (p *PageHeader) ReadField3(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadI32(ctx); err != nil { - return thrift.PrependError("error reading field 3: ", err) -} else { - p.CompressedPageSize = v -} - return nil -} - -func (p *PageHeader) ReadField4(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadI32(ctx); err != nil { - return thrift.PrependError("error reading field 4: ", err) -} else { - p.Crc = &v -} - return nil -} - -func (p *PageHeader) ReadField5(ctx context.Context, iprot thrift.TProtocol) error { - p.DataPageHeader = &DataPageHeader{} - if err := p.DataPageHeader.Read(ctx, iprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.DataPageHeader), err) - } - return nil -} - -func (p *PageHeader) ReadField6(ctx context.Context, iprot thrift.TProtocol) error { - p.IndexPageHeader = &IndexPageHeader{} - if err := p.IndexPageHeader.Read(ctx, iprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.IndexPageHeader), err) - } - return nil -} - -func (p *PageHeader) ReadField7(ctx context.Context, iprot thrift.TProtocol) error { - p.DictionaryPageHeader = &DictionaryPageHeader{} - if err := p.DictionaryPageHeader.Read(ctx, iprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.DictionaryPageHeader), err) - } - return nil -} - -func (p *PageHeader) ReadField8(ctx context.Context, iprot thrift.TProtocol) error { - p.DataPageHeaderV2 = &DataPageHeaderV2{ - IsCompressed: true, -} - if err := p.DataPageHeaderV2.Read(ctx, iprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.DataPageHeaderV2), err) - } - return nil + if _, err := iprot.ReadStructBegin(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) + } + + var issetType bool = false + var issetUncompressedPageSize bool = false + var issetCompressedPageSize bool = false + + for { + _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) + if err != nil { + return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) + } + if fieldTypeId == thrift.STOP { + break + } + switch fieldId { + case 1: + if fieldTypeId == thrift.I32 { + if err := p.ReadField1(ctx, iprot); err != nil { + return err + } + issetType = true + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 2: + if fieldTypeId == thrift.I32 { + if err := p.ReadField2(ctx, iprot); err != nil { + return err + } + issetUncompressedPageSize = true + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 3: + if fieldTypeId == thrift.I32 { + if err := p.ReadField3(ctx, iprot); err != nil { + return err + } + issetCompressedPageSize = true + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 4: + if fieldTypeId == thrift.I32 { + if err := p.ReadField4(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 5: + if fieldTypeId == thrift.STRUCT { + if err := p.ReadField5(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 6: + if fieldTypeId == thrift.STRUCT { + if err := p.ReadField6(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 7: + if fieldTypeId == thrift.STRUCT { + if err := p.ReadField7(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 8: + if fieldTypeId == thrift.STRUCT { + if err := p.ReadField8(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + default: + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + if err := iprot.ReadFieldEnd(ctx); err != nil { + return err + } + } + if err := iprot.ReadStructEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) + } + if !issetType { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Type is not set")) + } + if !issetUncompressedPageSize { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field UncompressedPageSize is not set")) + } + if !issetCompressedPageSize { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field CompressedPageSize is not set")) + } + return nil +} + +func (p *PageHeader) ReadField1(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadI32(ctx); err != nil { + return thrift.PrependError("error reading field 1: ", err) + } else { + temp := PageType(v) + p.Type = temp + } + return nil +} + +func (p *PageHeader) ReadField2(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadI32(ctx); err != nil { + return thrift.PrependError("error reading field 2: ", err) + } else { + p.UncompressedPageSize = v + } + return nil +} + +func (p *PageHeader) ReadField3(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadI32(ctx); err != nil { + return thrift.PrependError("error reading field 3: ", err) + } else { + p.CompressedPageSize = v + } + return nil +} + +func (p *PageHeader) ReadField4(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadI32(ctx); err != nil { + return thrift.PrependError("error reading field 4: ", err) + } else { + p.Crc = &v + } + return nil +} + +func (p *PageHeader) ReadField5(ctx context.Context, iprot thrift.TProtocol) error { + p.DataPageHeader = &DataPageHeader{} + if err := p.DataPageHeader.Read(ctx, iprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.DataPageHeader), err) + } + return nil +} + +func (p *PageHeader) ReadField6(ctx context.Context, iprot thrift.TProtocol) error { + p.IndexPageHeader = &IndexPageHeader{} + if err := p.IndexPageHeader.Read(ctx, iprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.IndexPageHeader), err) + } + return nil +} + +func (p *PageHeader) ReadField7(ctx context.Context, iprot thrift.TProtocol) error { + p.DictionaryPageHeader = &DictionaryPageHeader{} + if err := p.DictionaryPageHeader.Read(ctx, iprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.DictionaryPageHeader), err) + } + return nil +} + +func (p *PageHeader) ReadField8(ctx context.Context, iprot thrift.TProtocol) error { + p.DataPageHeaderV2 = &DataPageHeaderV2{ + IsCompressed: true, + } + if err := p.DataPageHeaderV2.Read(ctx, iprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.DataPageHeaderV2), err) + } + return nil } func (p *PageHeader) Write(ctx context.Context, oprot thrift.TProtocol) error { - if err := oprot.WriteStructBegin(ctx, "PageHeader"); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) } - if p != nil { - if err := p.writeField1(ctx, oprot); err != nil { return err } - if err := p.writeField2(ctx, oprot); err != nil { return err } - if err := p.writeField3(ctx, oprot); err != nil { return err } - if err := p.writeField4(ctx, oprot); err != nil { return err } - if err := p.writeField5(ctx, oprot); err != nil { return err } - if err := p.writeField6(ctx, oprot); err != nil { return err } - if err := p.writeField7(ctx, oprot); err != nil { return err } - if err := p.writeField8(ctx, oprot); err != nil { return err } - } - if err := oprot.WriteFieldStop(ctx); err != nil { - return thrift.PrependError("write field stop error: ", err) } - if err := oprot.WriteStructEnd(ctx); err != nil { - return thrift.PrependError("write struct stop error: ", err) } - return nil + if err := oprot.WriteStructBegin(ctx, "PageHeader"); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) + } + if p != nil { + if err := p.writeField1(ctx, oprot); err != nil { + return err + } + if err := p.writeField2(ctx, oprot); err != nil { + return err + } + if err := p.writeField3(ctx, oprot); err != nil { + return err + } + if err := p.writeField4(ctx, oprot); err != nil { + return err + } + if err := p.writeField5(ctx, oprot); err != nil { + return err + } + if err := p.writeField6(ctx, oprot); err != nil { + return err + } + if err := p.writeField7(ctx, oprot); err != nil { + return err + } + if err := p.writeField8(ctx, oprot); err != nil { + return err + } + } + if err := oprot.WriteFieldStop(ctx); err != nil { + return thrift.PrependError("write field stop error: ", err) + } + if err := oprot.WriteStructEnd(ctx); err != nil { + return thrift.PrependError("write struct stop error: ", err) + } + return nil } func (p *PageHeader) writeField1(ctx context.Context, oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin(ctx, "type", thrift.I32, 1); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:type: ", p), err) } - if err := oprot.WriteI32(ctx, int32(p.Type)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.type (1) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 1:type: ", p), err) } - return err + if err := oprot.WriteFieldBegin(ctx, "type", thrift.I32, 1); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:type: ", p), err) + } + if err := oprot.WriteI32(ctx, int32(p.Type)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.type (1) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 1:type: ", p), err) + } + return err } func (p *PageHeader) writeField2(ctx context.Context, oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin(ctx, "uncompressed_page_size", thrift.I32, 2); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:uncompressed_page_size: ", p), err) } - if err := oprot.WriteI32(ctx, int32(p.UncompressedPageSize)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.uncompressed_page_size (2) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 2:uncompressed_page_size: ", p), err) } - return err + if err := oprot.WriteFieldBegin(ctx, "uncompressed_page_size", thrift.I32, 2); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:uncompressed_page_size: ", p), err) + } + if err := oprot.WriteI32(ctx, int32(p.UncompressedPageSize)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.uncompressed_page_size (2) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 2:uncompressed_page_size: ", p), err) + } + return err } func (p *PageHeader) writeField3(ctx context.Context, oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin(ctx, "compressed_page_size", thrift.I32, 3); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 3:compressed_page_size: ", p), err) } - if err := oprot.WriteI32(ctx, int32(p.CompressedPageSize)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.compressed_page_size (3) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 3:compressed_page_size: ", p), err) } - return err + if err := oprot.WriteFieldBegin(ctx, "compressed_page_size", thrift.I32, 3); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 3:compressed_page_size: ", p), err) + } + if err := oprot.WriteI32(ctx, int32(p.CompressedPageSize)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.compressed_page_size (3) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 3:compressed_page_size: ", p), err) + } + return err } func (p *PageHeader) writeField4(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetCrc() { - if err := oprot.WriteFieldBegin(ctx, "crc", thrift.I32, 4); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 4:crc: ", p), err) } - if err := oprot.WriteI32(ctx, int32(*p.Crc)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.crc (4) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 4:crc: ", p), err) } - } - return err + if p.IsSetCrc() { + if err := oprot.WriteFieldBegin(ctx, "crc", thrift.I32, 4); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 4:crc: ", p), err) + } + if err := oprot.WriteI32(ctx, int32(*p.Crc)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.crc (4) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 4:crc: ", p), err) + } + } + return err } func (p *PageHeader) writeField5(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetDataPageHeader() { - if err := oprot.WriteFieldBegin(ctx, "data_page_header", thrift.STRUCT, 5); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 5:data_page_header: ", p), err) } - if err := p.DataPageHeader.Write(ctx, oprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.DataPageHeader), err) - } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 5:data_page_header: ", p), err) } - } - return err + if p.IsSetDataPageHeader() { + if err := oprot.WriteFieldBegin(ctx, "data_page_header", thrift.STRUCT, 5); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 5:data_page_header: ", p), err) + } + if err := p.DataPageHeader.Write(ctx, oprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.DataPageHeader), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 5:data_page_header: ", p), err) + } + } + return err } func (p *PageHeader) writeField6(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetIndexPageHeader() { - if err := oprot.WriteFieldBegin(ctx, "index_page_header", thrift.STRUCT, 6); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 6:index_page_header: ", p), err) } - if err := p.IndexPageHeader.Write(ctx, oprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.IndexPageHeader), err) - } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 6:index_page_header: ", p), err) } - } - return err + if p.IsSetIndexPageHeader() { + if err := oprot.WriteFieldBegin(ctx, "index_page_header", thrift.STRUCT, 6); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 6:index_page_header: ", p), err) + } + if err := p.IndexPageHeader.Write(ctx, oprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.IndexPageHeader), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 6:index_page_header: ", p), err) + } + } + return err } func (p *PageHeader) writeField7(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetDictionaryPageHeader() { - if err := oprot.WriteFieldBegin(ctx, "dictionary_page_header", thrift.STRUCT, 7); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 7:dictionary_page_header: ", p), err) } - if err := p.DictionaryPageHeader.Write(ctx, oprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.DictionaryPageHeader), err) - } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 7:dictionary_page_header: ", p), err) } - } - return err + if p.IsSetDictionaryPageHeader() { + if err := oprot.WriteFieldBegin(ctx, "dictionary_page_header", thrift.STRUCT, 7); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 7:dictionary_page_header: ", p), err) + } + if err := p.DictionaryPageHeader.Write(ctx, oprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.DictionaryPageHeader), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 7:dictionary_page_header: ", p), err) + } + } + return err } func (p *PageHeader) writeField8(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetDataPageHeaderV2() { - if err := oprot.WriteFieldBegin(ctx, "data_page_header_v2", thrift.STRUCT, 8); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 8:data_page_header_v2: ", p), err) } - if err := p.DataPageHeaderV2.Write(ctx, oprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.DataPageHeaderV2), err) - } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 8:data_page_header_v2: ", p), err) } - } - return err + if p.IsSetDataPageHeaderV2() { + if err := oprot.WriteFieldBegin(ctx, "data_page_header_v2", thrift.STRUCT, 8); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 8:data_page_header_v2: ", p), err) + } + if err := p.DataPageHeaderV2.Write(ctx, oprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.DataPageHeaderV2), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 8:data_page_header_v2: ", p), err) + } + } + return err } func (p *PageHeader) Equals(other *PageHeader) bool { - if p == other { - return true - } else if p == nil || other == nil { - return false - } - if p.Type != other.Type { return false } - if p.UncompressedPageSize != other.UncompressedPageSize { return false } - if p.CompressedPageSize != other.CompressedPageSize { return false } - if p.Crc != other.Crc { - if p.Crc == nil || other.Crc == nil { - return false - } - if (*p.Crc) != (*other.Crc) { return false } - } - if !p.DataPageHeader.Equals(other.DataPageHeader) { return false } - if !p.IndexPageHeader.Equals(other.IndexPageHeader) { return false } - if !p.DictionaryPageHeader.Equals(other.DictionaryPageHeader) { return false } - if !p.DataPageHeaderV2.Equals(other.DataPageHeaderV2) { return false } - return true + if p == other { + return true + } else if p == nil || other == nil { + return false + } + if p.Type != other.Type { + return false + } + if p.UncompressedPageSize != other.UncompressedPageSize { + return false + } + if p.CompressedPageSize != other.CompressedPageSize { + return false + } + if p.Crc != other.Crc { + if p.Crc == nil || other.Crc == nil { + return false + } + if (*p.Crc) != (*other.Crc) { + return false + } + } + if !p.DataPageHeader.Equals(other.DataPageHeader) { + return false + } + if !p.IndexPageHeader.Equals(other.IndexPageHeader) { + return false + } + if !p.DictionaryPageHeader.Equals(other.DictionaryPageHeader) { + return false + } + if !p.DataPageHeaderV2.Equals(other.DataPageHeaderV2) { + return false + } + return true } func (p *PageHeader) String() string { - if p == nil { - return "" - } - return fmt.Sprintf("PageHeader(%+v)", *p) + if p == nil { + return "" + } + return fmt.Sprintf("PageHeader(%+v)", *p) } func (p *PageHeader) Validate() error { - return nil + return nil } + // Wrapper struct to store key values -// +// // Attributes: -// - Key -// - Value +// - Key +// - Value type KeyValue struct { - Key string `thrift:"key,1,required" db:"key" json:"key"` - Value *string `thrift:"value,2" db:"value" json:"value,omitempty"` + Key string `thrift:"key,1,required" db:"key" json:"key"` + Value *string `thrift:"value,2" db:"value" json:"value,omitempty"` } func NewKeyValue() *KeyValue { - return &KeyValue{} + return &KeyValue{} } - func (p *KeyValue) GetKey() string { - return p.Key + return p.Key } + var KeyValue_Value_DEFAULT string + func (p *KeyValue) GetValue() string { - if !p.IsSetValue() { - return KeyValue_Value_DEFAULT - } -return *p.Value + if !p.IsSetValue() { + return KeyValue_Value_DEFAULT + } + return *p.Value } func (p *KeyValue) IsSetValue() bool { - return p.Value != nil + return p.Value != nil } func (p *KeyValue) Read(ctx context.Context, iprot thrift.TProtocol) error { - if _, err := iprot.ReadStructBegin(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) - } - - var issetKey bool = false; - - for { - _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) - if err != nil { - return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) - } - if fieldTypeId == thrift.STOP { break; } - switch fieldId { - case 1: - if fieldTypeId == thrift.STRING { - if err := p.ReadField1(ctx, iprot); err != nil { - return err - } - issetKey = true - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 2: - if fieldTypeId == thrift.STRING { - if err := p.ReadField2(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - default: - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - if err := iprot.ReadFieldEnd(ctx); err != nil { - return err - } - } - if err := iprot.ReadStructEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) - } - if !issetKey{ - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Key is not set")); - } - return nil -} - -func (p *KeyValue) ReadField1(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadString(ctx); err != nil { - return thrift.PrependError("error reading field 1: ", err) -} else { - p.Key = v -} - return nil -} - -func (p *KeyValue) ReadField2(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadString(ctx); err != nil { - return thrift.PrependError("error reading field 2: ", err) -} else { - p.Value = &v -} - return nil + if _, err := iprot.ReadStructBegin(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) + } + + var issetKey bool = false + + for { + _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) + if err != nil { + return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) + } + if fieldTypeId == thrift.STOP { + break + } + switch fieldId { + case 1: + if fieldTypeId == thrift.STRING { + if err := p.ReadField1(ctx, iprot); err != nil { + return err + } + issetKey = true + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 2: + if fieldTypeId == thrift.STRING { + if err := p.ReadField2(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + default: + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + if err := iprot.ReadFieldEnd(ctx); err != nil { + return err + } + } + if err := iprot.ReadStructEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) + } + if !issetKey { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Key is not set")) + } + return nil +} + +func (p *KeyValue) ReadField1(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadString(ctx); err != nil { + return thrift.PrependError("error reading field 1: ", err) + } else { + p.Key = v + } + return nil +} + +func (p *KeyValue) ReadField2(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadString(ctx); err != nil { + return thrift.PrependError("error reading field 2: ", err) + } else { + p.Value = &v + } + return nil } func (p *KeyValue) Write(ctx context.Context, oprot thrift.TProtocol) error { - if err := oprot.WriteStructBegin(ctx, "KeyValue"); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) } - if p != nil { - if err := p.writeField1(ctx, oprot); err != nil { return err } - if err := p.writeField2(ctx, oprot); err != nil { return err } - } - if err := oprot.WriteFieldStop(ctx); err != nil { - return thrift.PrependError("write field stop error: ", err) } - if err := oprot.WriteStructEnd(ctx); err != nil { - return thrift.PrependError("write struct stop error: ", err) } - return nil + if err := oprot.WriteStructBegin(ctx, "KeyValue"); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) + } + if p != nil { + if err := p.writeField1(ctx, oprot); err != nil { + return err + } + if err := p.writeField2(ctx, oprot); err != nil { + return err + } + } + if err := oprot.WriteFieldStop(ctx); err != nil { + return thrift.PrependError("write field stop error: ", err) + } + if err := oprot.WriteStructEnd(ctx); err != nil { + return thrift.PrependError("write struct stop error: ", err) + } + return nil } func (p *KeyValue) writeField1(ctx context.Context, oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin(ctx, "key", thrift.STRING, 1); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:key: ", p), err) } - if err := oprot.WriteString(ctx, string(p.Key)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.key (1) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 1:key: ", p), err) } - return err + if err := oprot.WriteFieldBegin(ctx, "key", thrift.STRING, 1); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:key: ", p), err) + } + if err := oprot.WriteString(ctx, string(p.Key)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.key (1) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 1:key: ", p), err) + } + return err } func (p *KeyValue) writeField2(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetValue() { - if err := oprot.WriteFieldBegin(ctx, "value", thrift.STRING, 2); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:value: ", p), err) } - if err := oprot.WriteString(ctx, string(*p.Value)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.value (2) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 2:value: ", p), err) } - } - return err + if p.IsSetValue() { + if err := oprot.WriteFieldBegin(ctx, "value", thrift.STRING, 2); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:value: ", p), err) + } + if err := oprot.WriteString(ctx, string(*p.Value)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.value (2) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 2:value: ", p), err) + } + } + return err } func (p *KeyValue) Equals(other *KeyValue) bool { - if p == other { - return true - } else if p == nil || other == nil { - return false - } - if p.Key != other.Key { return false } - if p.Value != other.Value { - if p.Value == nil || other.Value == nil { - return false - } - if (*p.Value) != (*other.Value) { return false } - } - return true + if p == other { + return true + } else if p == nil || other == nil { + return false + } + if p.Key != other.Key { + return false + } + if p.Value != other.Value { + if p.Value == nil || other.Value == nil { + return false + } + if (*p.Value) != (*other.Value) { + return false + } + } + return true } func (p *KeyValue) String() string { - if p == nil { - return "" - } - return fmt.Sprintf("KeyValue(%+v)", *p) + if p == nil { + return "" + } + return fmt.Sprintf("KeyValue(%+v)", *p) } func (p *KeyValue) Validate() error { - return nil + return nil } + // Wrapper struct to specify sort order -// +// // Attributes: -// - ColumnIdx: The column index (in this row group) * -// - Descending: If true, indicates this column is sorted in descending order. * -// - NullsFirst: If true, nulls will come before non-null values, otherwise, +// - ColumnIdx: The column index (in this row group) * +// - Descending: If true, indicates this column is sorted in descending order. * +// - NullsFirst: If true, nulls will come before non-null values, otherwise, +// // nulls go at the end. type SortingColumn struct { - ColumnIdx int32 `thrift:"column_idx,1,required" db:"column_idx" json:"column_idx"` - Descending bool `thrift:"descending,2,required" db:"descending" json:"descending"` - NullsFirst bool `thrift:"nulls_first,3,required" db:"nulls_first" json:"nulls_first"` + ColumnIdx int32 `thrift:"column_idx,1,required" db:"column_idx" json:"column_idx"` + Descending bool `thrift:"descending,2,required" db:"descending" json:"descending"` + NullsFirst bool `thrift:"nulls_first,3,required" db:"nulls_first" json:"nulls_first"` } func NewSortingColumn() *SortingColumn { - return &SortingColumn{} + return &SortingColumn{} } - func (p *SortingColumn) GetColumnIdx() int32 { - return p.ColumnIdx + return p.ColumnIdx } func (p *SortingColumn) GetDescending() bool { - return p.Descending + return p.Descending } func (p *SortingColumn) GetNullsFirst() bool { - return p.NullsFirst + return p.NullsFirst } func (p *SortingColumn) Read(ctx context.Context, iprot thrift.TProtocol) error { - if _, err := iprot.ReadStructBegin(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) - } - - var issetColumnIdx bool = false; - var issetDescending bool = false; - var issetNullsFirst bool = false; - - for { - _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) - if err != nil { - return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) - } - if fieldTypeId == thrift.STOP { break; } - switch fieldId { - case 1: - if fieldTypeId == thrift.I32 { - if err := p.ReadField1(ctx, iprot); err != nil { - return err - } - issetColumnIdx = true - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 2: - if fieldTypeId == thrift.BOOL { - if err := p.ReadField2(ctx, iprot); err != nil { - return err - } - issetDescending = true - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 3: - if fieldTypeId == thrift.BOOL { - if err := p.ReadField3(ctx, iprot); err != nil { - return err - } - issetNullsFirst = true - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - default: - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - if err := iprot.ReadFieldEnd(ctx); err != nil { - return err - } - } - if err := iprot.ReadStructEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) - } - if !issetColumnIdx{ - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field ColumnIdx is not set")); - } - if !issetDescending{ - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Descending is not set")); - } - if !issetNullsFirst{ - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field NullsFirst is not set")); - } - return nil -} - -func (p *SortingColumn) ReadField1(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadI32(ctx); err != nil { - return thrift.PrependError("error reading field 1: ", err) -} else { - p.ColumnIdx = v -} - return nil -} - -func (p *SortingColumn) ReadField2(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadBool(ctx); err != nil { - return thrift.PrependError("error reading field 2: ", err) -} else { - p.Descending = v -} - return nil -} - -func (p *SortingColumn) ReadField3(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadBool(ctx); err != nil { - return thrift.PrependError("error reading field 3: ", err) -} else { - p.NullsFirst = v -} - return nil + if _, err := iprot.ReadStructBegin(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) + } + + var issetColumnIdx bool = false + var issetDescending bool = false + var issetNullsFirst bool = false + + for { + _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) + if err != nil { + return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) + } + if fieldTypeId == thrift.STOP { + break + } + switch fieldId { + case 1: + if fieldTypeId == thrift.I32 { + if err := p.ReadField1(ctx, iprot); err != nil { + return err + } + issetColumnIdx = true + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 2: + if fieldTypeId == thrift.BOOL { + if err := p.ReadField2(ctx, iprot); err != nil { + return err + } + issetDescending = true + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 3: + if fieldTypeId == thrift.BOOL { + if err := p.ReadField3(ctx, iprot); err != nil { + return err + } + issetNullsFirst = true + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + default: + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + if err := iprot.ReadFieldEnd(ctx); err != nil { + return err + } + } + if err := iprot.ReadStructEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) + } + if !issetColumnIdx { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field ColumnIdx is not set")) + } + if !issetDescending { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Descending is not set")) + } + if !issetNullsFirst { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field NullsFirst is not set")) + } + return nil +} + +func (p *SortingColumn) ReadField1(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadI32(ctx); err != nil { + return thrift.PrependError("error reading field 1: ", err) + } else { + p.ColumnIdx = v + } + return nil +} + +func (p *SortingColumn) ReadField2(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadBool(ctx); err != nil { + return thrift.PrependError("error reading field 2: ", err) + } else { + p.Descending = v + } + return nil +} + +func (p *SortingColumn) ReadField3(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadBool(ctx); err != nil { + return thrift.PrependError("error reading field 3: ", err) + } else { + p.NullsFirst = v + } + return nil } func (p *SortingColumn) Write(ctx context.Context, oprot thrift.TProtocol) error { - if err := oprot.WriteStructBegin(ctx, "SortingColumn"); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) } - if p != nil { - if err := p.writeField1(ctx, oprot); err != nil { return err } - if err := p.writeField2(ctx, oprot); err != nil { return err } - if err := p.writeField3(ctx, oprot); err != nil { return err } - } - if err := oprot.WriteFieldStop(ctx); err != nil { - return thrift.PrependError("write field stop error: ", err) } - if err := oprot.WriteStructEnd(ctx); err != nil { - return thrift.PrependError("write struct stop error: ", err) } - return nil + if err := oprot.WriteStructBegin(ctx, "SortingColumn"); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) + } + if p != nil { + if err := p.writeField1(ctx, oprot); err != nil { + return err + } + if err := p.writeField2(ctx, oprot); err != nil { + return err + } + if err := p.writeField3(ctx, oprot); err != nil { + return err + } + } + if err := oprot.WriteFieldStop(ctx); err != nil { + return thrift.PrependError("write field stop error: ", err) + } + if err := oprot.WriteStructEnd(ctx); err != nil { + return thrift.PrependError("write struct stop error: ", err) + } + return nil } func (p *SortingColumn) writeField1(ctx context.Context, oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin(ctx, "column_idx", thrift.I32, 1); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:column_idx: ", p), err) } - if err := oprot.WriteI32(ctx, int32(p.ColumnIdx)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.column_idx (1) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 1:column_idx: ", p), err) } - return err + if err := oprot.WriteFieldBegin(ctx, "column_idx", thrift.I32, 1); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:column_idx: ", p), err) + } + if err := oprot.WriteI32(ctx, int32(p.ColumnIdx)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.column_idx (1) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 1:column_idx: ", p), err) + } + return err } func (p *SortingColumn) writeField2(ctx context.Context, oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin(ctx, "descending", thrift.BOOL, 2); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:descending: ", p), err) } - if err := oprot.WriteBool(ctx, bool(p.Descending)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.descending (2) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 2:descending: ", p), err) } - return err + if err := oprot.WriteFieldBegin(ctx, "descending", thrift.BOOL, 2); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:descending: ", p), err) + } + if err := oprot.WriteBool(ctx, bool(p.Descending)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.descending (2) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 2:descending: ", p), err) + } + return err } func (p *SortingColumn) writeField3(ctx context.Context, oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin(ctx, "nulls_first", thrift.BOOL, 3); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 3:nulls_first: ", p), err) } - if err := oprot.WriteBool(ctx, bool(p.NullsFirst)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.nulls_first (3) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 3:nulls_first: ", p), err) } - return err + if err := oprot.WriteFieldBegin(ctx, "nulls_first", thrift.BOOL, 3); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 3:nulls_first: ", p), err) + } + if err := oprot.WriteBool(ctx, bool(p.NullsFirst)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.nulls_first (3) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 3:nulls_first: ", p), err) + } + return err } func (p *SortingColumn) Equals(other *SortingColumn) bool { - if p == other { - return true - } else if p == nil || other == nil { - return false - } - if p.ColumnIdx != other.ColumnIdx { return false } - if p.Descending != other.Descending { return false } - if p.NullsFirst != other.NullsFirst { return false } - return true + if p == other { + return true + } else if p == nil || other == nil { + return false + } + if p.ColumnIdx != other.ColumnIdx { + return false + } + if p.Descending != other.Descending { + return false + } + if p.NullsFirst != other.NullsFirst { + return false + } + return true } func (p *SortingColumn) String() string { - if p == nil { - return "" - } - return fmt.Sprintf("SortingColumn(%+v)", *p) + if p == nil { + return "" + } + return fmt.Sprintf("SortingColumn(%+v)", *p) } func (p *SortingColumn) Validate() error { - return nil + return nil } + // statistics of a given page type and encoding -// +// // Attributes: -// - PageType: the page type (data/dic/...) * -// - Encoding: encoding of the page * -// - Count: number of pages of this type with this encoding * +// - PageType: the page type (data/dic/...) * +// - Encoding: encoding of the page * +// - Count: number of pages of this type with this encoding * type PageEncodingStats struct { - PageType PageType `thrift:"page_type,1,required" db:"page_type" json:"page_type"` - Encoding Encoding `thrift:"encoding,2,required" db:"encoding" json:"encoding"` - Count int32 `thrift:"count,3,required" db:"count" json:"count"` + PageType PageType `thrift:"page_type,1,required" db:"page_type" json:"page_type"` + Encoding Encoding `thrift:"encoding,2,required" db:"encoding" json:"encoding"` + Count int32 `thrift:"count,3,required" db:"count" json:"count"` } func NewPageEncodingStats() *PageEncodingStats { - return &PageEncodingStats{} + return &PageEncodingStats{} } - func (p *PageEncodingStats) GetPageType() PageType { - return p.PageType + return p.PageType } func (p *PageEncodingStats) GetEncoding() Encoding { - return p.Encoding + return p.Encoding } func (p *PageEncodingStats) GetCount() int32 { - return p.Count + return p.Count } func (p *PageEncodingStats) Read(ctx context.Context, iprot thrift.TProtocol) error { - if _, err := iprot.ReadStructBegin(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) - } - - var issetPageType bool = false; - var issetEncoding bool = false; - var issetCount bool = false; - - for { - _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) - if err != nil { - return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) - } - if fieldTypeId == thrift.STOP { break; } - switch fieldId { - case 1: - if fieldTypeId == thrift.I32 { - if err := p.ReadField1(ctx, iprot); err != nil { - return err - } - issetPageType = true - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 2: - if fieldTypeId == thrift.I32 { - if err := p.ReadField2(ctx, iprot); err != nil { - return err - } - issetEncoding = true - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 3: - if fieldTypeId == thrift.I32 { - if err := p.ReadField3(ctx, iprot); err != nil { - return err - } - issetCount = true - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - default: - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - if err := iprot.ReadFieldEnd(ctx); err != nil { - return err - } - } - if err := iprot.ReadStructEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) - } - if !issetPageType{ - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field PageType is not set")); - } - if !issetEncoding{ - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Encoding is not set")); - } - if !issetCount{ - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Count is not set")); - } - return nil -} - -func (p *PageEncodingStats) ReadField1(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadI32(ctx); err != nil { - return thrift.PrependError("error reading field 1: ", err) -} else { - temp := PageType(v) - p.PageType = temp -} - return nil -} - -func (p *PageEncodingStats) ReadField2(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadI32(ctx); err != nil { - return thrift.PrependError("error reading field 2: ", err) -} else { - temp := Encoding(v) - p.Encoding = temp -} - return nil -} - -func (p *PageEncodingStats) ReadField3(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadI32(ctx); err != nil { - return thrift.PrependError("error reading field 3: ", err) -} else { - p.Count = v -} - return nil + if _, err := iprot.ReadStructBegin(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) + } + + var issetPageType bool = false + var issetEncoding bool = false + var issetCount bool = false + + for { + _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) + if err != nil { + return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) + } + if fieldTypeId == thrift.STOP { + break + } + switch fieldId { + case 1: + if fieldTypeId == thrift.I32 { + if err := p.ReadField1(ctx, iprot); err != nil { + return err + } + issetPageType = true + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 2: + if fieldTypeId == thrift.I32 { + if err := p.ReadField2(ctx, iprot); err != nil { + return err + } + issetEncoding = true + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 3: + if fieldTypeId == thrift.I32 { + if err := p.ReadField3(ctx, iprot); err != nil { + return err + } + issetCount = true + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + default: + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + if err := iprot.ReadFieldEnd(ctx); err != nil { + return err + } + } + if err := iprot.ReadStructEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) + } + if !issetPageType { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field PageType is not set")) + } + if !issetEncoding { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Encoding is not set")) + } + if !issetCount { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Count is not set")) + } + return nil +} + +func (p *PageEncodingStats) ReadField1(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadI32(ctx); err != nil { + return thrift.PrependError("error reading field 1: ", err) + } else { + temp := PageType(v) + p.PageType = temp + } + return nil +} + +func (p *PageEncodingStats) ReadField2(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadI32(ctx); err != nil { + return thrift.PrependError("error reading field 2: ", err) + } else { + temp := Encoding(v) + p.Encoding = temp + } + return nil +} + +func (p *PageEncodingStats) ReadField3(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadI32(ctx); err != nil { + return thrift.PrependError("error reading field 3: ", err) + } else { + p.Count = v + } + return nil } func (p *PageEncodingStats) Write(ctx context.Context, oprot thrift.TProtocol) error { - if err := oprot.WriteStructBegin(ctx, "PageEncodingStats"); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) } - if p != nil { - if err := p.writeField1(ctx, oprot); err != nil { return err } - if err := p.writeField2(ctx, oprot); err != nil { return err } - if err := p.writeField3(ctx, oprot); err != nil { return err } - } - if err := oprot.WriteFieldStop(ctx); err != nil { - return thrift.PrependError("write field stop error: ", err) } - if err := oprot.WriteStructEnd(ctx); err != nil { - return thrift.PrependError("write struct stop error: ", err) } - return nil + if err := oprot.WriteStructBegin(ctx, "PageEncodingStats"); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) + } + if p != nil { + if err := p.writeField1(ctx, oprot); err != nil { + return err + } + if err := p.writeField2(ctx, oprot); err != nil { + return err + } + if err := p.writeField3(ctx, oprot); err != nil { + return err + } + } + if err := oprot.WriteFieldStop(ctx); err != nil { + return thrift.PrependError("write field stop error: ", err) + } + if err := oprot.WriteStructEnd(ctx); err != nil { + return thrift.PrependError("write struct stop error: ", err) + } + return nil } func (p *PageEncodingStats) writeField1(ctx context.Context, oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin(ctx, "page_type", thrift.I32, 1); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:page_type: ", p), err) } - if err := oprot.WriteI32(ctx, int32(p.PageType)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.page_type (1) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 1:page_type: ", p), err) } - return err + if err := oprot.WriteFieldBegin(ctx, "page_type", thrift.I32, 1); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:page_type: ", p), err) + } + if err := oprot.WriteI32(ctx, int32(p.PageType)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.page_type (1) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 1:page_type: ", p), err) + } + return err } func (p *PageEncodingStats) writeField2(ctx context.Context, oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin(ctx, "encoding", thrift.I32, 2); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:encoding: ", p), err) } - if err := oprot.WriteI32(ctx, int32(p.Encoding)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.encoding (2) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 2:encoding: ", p), err) } - return err + if err := oprot.WriteFieldBegin(ctx, "encoding", thrift.I32, 2); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:encoding: ", p), err) + } + if err := oprot.WriteI32(ctx, int32(p.Encoding)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.encoding (2) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 2:encoding: ", p), err) + } + return err } func (p *PageEncodingStats) writeField3(ctx context.Context, oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin(ctx, "count", thrift.I32, 3); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 3:count: ", p), err) } - if err := oprot.WriteI32(ctx, int32(p.Count)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.count (3) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 3:count: ", p), err) } - return err + if err := oprot.WriteFieldBegin(ctx, "count", thrift.I32, 3); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 3:count: ", p), err) + } + if err := oprot.WriteI32(ctx, int32(p.Count)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.count (3) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 3:count: ", p), err) + } + return err } func (p *PageEncodingStats) Equals(other *PageEncodingStats) bool { - if p == other { - return true - } else if p == nil || other == nil { - return false - } - if p.PageType != other.PageType { return false } - if p.Encoding != other.Encoding { return false } - if p.Count != other.Count { return false } - return true + if p == other { + return true + } else if p == nil || other == nil { + return false + } + if p.PageType != other.PageType { + return false + } + if p.Encoding != other.Encoding { + return false + } + if p.Count != other.Count { + return false + } + return true } func (p *PageEncodingStats) String() string { - if p == nil { - return "" - } - return fmt.Sprintf("PageEncodingStats(%+v)", *p) + if p == nil { + return "" + } + return fmt.Sprintf("PageEncodingStats(%+v)", *p) } func (p *PageEncodingStats) Validate() error { - return nil + return nil } + // Description for column metadata -// +// // Attributes: -// - Type: Type of this column * -// - Encodings: Set of all encodings used for this column. The purpose is to validate +// - Type: Type of this column * +// - Encodings: Set of all encodings used for this column. The purpose is to validate +// // whether we can decode those pages. * -// - PathInSchema: Path in schema * -// - Codec: Compression codec * -// - NumValues: Number of values in this column * -// - TotalUncompressedSize: total byte size of all uncompressed pages in this column chunk (including the headers) * -// - TotalCompressedSize: total byte size of all compressed, and potentially encrypted, pages +// - PathInSchema: Path in schema * +// - Codec: Compression codec * +// - NumValues: Number of values in this column * +// - TotalUncompressedSize: total byte size of all uncompressed pages in this column chunk (including the headers) * +// - TotalCompressedSize: total byte size of all compressed, and potentially encrypted, pages +// // in this column chunk (including the headers) * -// - KeyValueMetadata: Optional key/value metadata * -// - DataPageOffset: Byte offset from beginning of file to first data page * -// - IndexPageOffset: Byte offset from beginning of file to root index page * -// - DictionaryPageOffset: Byte offset from the beginning of file to first (only) dictionary page * -// - Statistics: optional statistics for this column chunk -// - EncodingStats: Set of all encodings used for pages in this column chunk. +// - KeyValueMetadata: Optional key/value metadata * +// - DataPageOffset: Byte offset from beginning of file to first data page * +// - IndexPageOffset: Byte offset from beginning of file to root index page * +// - DictionaryPageOffset: Byte offset from the beginning of file to first (only) dictionary page * +// - Statistics: optional statistics for this column chunk +// - EncodingStats: Set of all encodings used for pages in this column chunk. +// // This information can be used to determine if all data pages are // dictionary encoded for example * -// - BloomFilterOffset: Byte offset from beginning of file to Bloom filter data. * +// - BloomFilterOffset: Byte offset from beginning of file to Bloom filter data. * type ColumnMetaData struct { - Type Type `thrift:"type,1,required" db:"type" json:"type"` - Encodings []Encoding `thrift:"encodings,2,required" db:"encodings" json:"encodings"` - PathInSchema []string `thrift:"path_in_schema,3,required" db:"path_in_schema" json:"path_in_schema"` - Codec CompressionCodec `thrift:"codec,4,required" db:"codec" json:"codec"` - NumValues int64 `thrift:"num_values,5,required" db:"num_values" json:"num_values"` - TotalUncompressedSize int64 `thrift:"total_uncompressed_size,6,required" db:"total_uncompressed_size" json:"total_uncompressed_size"` - TotalCompressedSize int64 `thrift:"total_compressed_size,7,required" db:"total_compressed_size" json:"total_compressed_size"` - KeyValueMetadata []*KeyValue `thrift:"key_value_metadata,8" db:"key_value_metadata" json:"key_value_metadata,omitempty"` - DataPageOffset int64 `thrift:"data_page_offset,9,required" db:"data_page_offset" json:"data_page_offset"` - IndexPageOffset *int64 `thrift:"index_page_offset,10" db:"index_page_offset" json:"index_page_offset,omitempty"` - DictionaryPageOffset *int64 `thrift:"dictionary_page_offset,11" db:"dictionary_page_offset" json:"dictionary_page_offset,omitempty"` - Statistics *Statistics `thrift:"statistics,12" db:"statistics" json:"statistics,omitempty"` - EncodingStats []*PageEncodingStats `thrift:"encoding_stats,13" db:"encoding_stats" json:"encoding_stats,omitempty"` - BloomFilterOffset *int64 `thrift:"bloom_filter_offset,14" db:"bloom_filter_offset" json:"bloom_filter_offset,omitempty"` + Type Type `thrift:"type,1,required" db:"type" json:"type"` + Encodings []Encoding `thrift:"encodings,2,required" db:"encodings" json:"encodings"` + PathInSchema []string `thrift:"path_in_schema,3,required" db:"path_in_schema" json:"path_in_schema"` + Codec CompressionCodec `thrift:"codec,4,required" db:"codec" json:"codec"` + NumValues int64 `thrift:"num_values,5,required" db:"num_values" json:"num_values"` + TotalUncompressedSize int64 `thrift:"total_uncompressed_size,6,required" db:"total_uncompressed_size" json:"total_uncompressed_size"` + TotalCompressedSize int64 `thrift:"total_compressed_size,7,required" db:"total_compressed_size" json:"total_compressed_size"` + KeyValueMetadata []*KeyValue `thrift:"key_value_metadata,8" db:"key_value_metadata" json:"key_value_metadata,omitempty"` + DataPageOffset int64 `thrift:"data_page_offset,9,required" db:"data_page_offset" json:"data_page_offset"` + IndexPageOffset *int64 `thrift:"index_page_offset,10" db:"index_page_offset" json:"index_page_offset,omitempty"` + DictionaryPageOffset *int64 `thrift:"dictionary_page_offset,11" db:"dictionary_page_offset" json:"dictionary_page_offset,omitempty"` + Statistics *Statistics `thrift:"statistics,12" db:"statistics" json:"statistics,omitempty"` + EncodingStats []*PageEncodingStats `thrift:"encoding_stats,13" db:"encoding_stats" json:"encoding_stats,omitempty"` + BloomFilterOffset *int64 `thrift:"bloom_filter_offset,14" db:"bloom_filter_offset" json:"bloom_filter_offset,omitempty"` } func NewColumnMetaData() *ColumnMetaData { - return &ColumnMetaData{} + return &ColumnMetaData{} } - func (p *ColumnMetaData) GetType() Type { - return p.Type + return p.Type } func (p *ColumnMetaData) GetEncodings() []Encoding { - return p.Encodings + return p.Encodings } func (p *ColumnMetaData) GetPathInSchema() []string { - return p.PathInSchema + return p.PathInSchema } func (p *ColumnMetaData) GetCodec() CompressionCodec { - return p.Codec + return p.Codec } func (p *ColumnMetaData) GetNumValues() int64 { - return p.NumValues + return p.NumValues } func (p *ColumnMetaData) GetTotalUncompressedSize() int64 { - return p.TotalUncompressedSize + return p.TotalUncompressedSize } func (p *ColumnMetaData) GetTotalCompressedSize() int64 { - return p.TotalCompressedSize + return p.TotalCompressedSize } + var ColumnMetaData_KeyValueMetadata_DEFAULT []*KeyValue func (p *ColumnMetaData) GetKeyValueMetadata() []*KeyValue { - return p.KeyValueMetadata + return p.KeyValueMetadata } func (p *ColumnMetaData) GetDataPageOffset() int64 { - return p.DataPageOffset + return p.DataPageOffset } + var ColumnMetaData_IndexPageOffset_DEFAULT int64 + func (p *ColumnMetaData) GetIndexPageOffset() int64 { - if !p.IsSetIndexPageOffset() { - return ColumnMetaData_IndexPageOffset_DEFAULT - } -return *p.IndexPageOffset + if !p.IsSetIndexPageOffset() { + return ColumnMetaData_IndexPageOffset_DEFAULT + } + return *p.IndexPageOffset } + var ColumnMetaData_DictionaryPageOffset_DEFAULT int64 + func (p *ColumnMetaData) GetDictionaryPageOffset() int64 { - if !p.IsSetDictionaryPageOffset() { - return ColumnMetaData_DictionaryPageOffset_DEFAULT - } -return *p.DictionaryPageOffset + if !p.IsSetDictionaryPageOffset() { + return ColumnMetaData_DictionaryPageOffset_DEFAULT + } + return *p.DictionaryPageOffset } + var ColumnMetaData_Statistics_DEFAULT *Statistics + func (p *ColumnMetaData) GetStatistics() *Statistics { - if !p.IsSetStatistics() { - return ColumnMetaData_Statistics_DEFAULT - } -return p.Statistics + if !p.IsSetStatistics() { + return ColumnMetaData_Statistics_DEFAULT + } + return p.Statistics } + var ColumnMetaData_EncodingStats_DEFAULT []*PageEncodingStats func (p *ColumnMetaData) GetEncodingStats() []*PageEncodingStats { - return p.EncodingStats + return p.EncodingStats } + var ColumnMetaData_BloomFilterOffset_DEFAULT int64 + func (p *ColumnMetaData) GetBloomFilterOffset() int64 { - if !p.IsSetBloomFilterOffset() { - return ColumnMetaData_BloomFilterOffset_DEFAULT - } -return *p.BloomFilterOffset + if !p.IsSetBloomFilterOffset() { + return ColumnMetaData_BloomFilterOffset_DEFAULT + } + return *p.BloomFilterOffset } func (p *ColumnMetaData) IsSetKeyValueMetadata() bool { - return p.KeyValueMetadata != nil + return p.KeyValueMetadata != nil } func (p *ColumnMetaData) IsSetIndexPageOffset() bool { - return p.IndexPageOffset != nil + return p.IndexPageOffset != nil } func (p *ColumnMetaData) IsSetDictionaryPageOffset() bool { - return p.DictionaryPageOffset != nil + return p.DictionaryPageOffset != nil } func (p *ColumnMetaData) IsSetStatistics() bool { - return p.Statistics != nil + return p.Statistics != nil } func (p *ColumnMetaData) IsSetEncodingStats() bool { - return p.EncodingStats != nil + return p.EncodingStats != nil } func (p *ColumnMetaData) IsSetBloomFilterOffset() bool { - return p.BloomFilterOffset != nil + return p.BloomFilterOffset != nil } func (p *ColumnMetaData) Read(ctx context.Context, iprot thrift.TProtocol) error { - if _, err := iprot.ReadStructBegin(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) - } - - var issetType bool = false; - var issetEncodings bool = false; - var issetPathInSchema bool = false; - var issetCodec bool = false; - var issetNumValues bool = false; - var issetTotalUncompressedSize bool = false; - var issetTotalCompressedSize bool = false; - var issetDataPageOffset bool = false; - - for { - _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) - if err != nil { - return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) - } - if fieldTypeId == thrift.STOP { break; } - switch fieldId { - case 1: - if fieldTypeId == thrift.I32 { - if err := p.ReadField1(ctx, iprot); err != nil { - return err - } - issetType = true - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 2: - if fieldTypeId == thrift.LIST { - if err := p.ReadField2(ctx, iprot); err != nil { - return err - } - issetEncodings = true - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 3: - if fieldTypeId == thrift.LIST { - if err := p.ReadField3(ctx, iprot); err != nil { - return err - } - issetPathInSchema = true - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 4: - if fieldTypeId == thrift.I32 { - if err := p.ReadField4(ctx, iprot); err != nil { - return err - } - issetCodec = true - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 5: - if fieldTypeId == thrift.I64 { - if err := p.ReadField5(ctx, iprot); err != nil { - return err - } - issetNumValues = true - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 6: - if fieldTypeId == thrift.I64 { - if err := p.ReadField6(ctx, iprot); err != nil { - return err - } - issetTotalUncompressedSize = true - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 7: - if fieldTypeId == thrift.I64 { - if err := p.ReadField7(ctx, iprot); err != nil { - return err - } - issetTotalCompressedSize = true - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 8: - if fieldTypeId == thrift.LIST { - if err := p.ReadField8(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 9: - if fieldTypeId == thrift.I64 { - if err := p.ReadField9(ctx, iprot); err != nil { - return err - } - issetDataPageOffset = true - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 10: - if fieldTypeId == thrift.I64 { - if err := p.ReadField10(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 11: - if fieldTypeId == thrift.I64 { - if err := p.ReadField11(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 12: - if fieldTypeId == thrift.STRUCT { - if err := p.ReadField12(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 13: - if fieldTypeId == thrift.LIST { - if err := p.ReadField13(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 14: - if fieldTypeId == thrift.I64 { - if err := p.ReadField14(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - default: - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - if err := iprot.ReadFieldEnd(ctx); err != nil { - return err - } - } - if err := iprot.ReadStructEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) - } - if !issetType{ - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Type is not set")); - } - if !issetEncodings{ - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Encodings is not set")); - } - if !issetPathInSchema{ - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field PathInSchema is not set")); - } - if !issetCodec{ - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Codec is not set")); - } - if !issetNumValues{ - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field NumValues is not set")); - } - if !issetTotalUncompressedSize{ - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field TotalUncompressedSize is not set")); - } - if !issetTotalCompressedSize{ - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field TotalCompressedSize is not set")); - } - if !issetDataPageOffset{ - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field DataPageOffset is not set")); - } - return nil -} - -func (p *ColumnMetaData) ReadField1(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadI32(ctx); err != nil { - return thrift.PrependError("error reading field 1: ", err) -} else { - temp := Type(v) - p.Type = temp -} - return nil -} - -func (p *ColumnMetaData) ReadField2(ctx context.Context, iprot thrift.TProtocol) error { - _, size, err := iprot.ReadListBegin(ctx) - if err != nil { - return thrift.PrependError("error reading list begin: ", err) - } - tSlice := make([]Encoding, 0, size) - p.Encodings = tSlice - for i := 0; i < size; i ++ { -var _elem0 Encoding - if v, err := iprot.ReadI32(ctx); err != nil { - return thrift.PrependError("error reading field 0: ", err) -} else { - temp := Encoding(v) - _elem0 = temp -} - p.Encodings = append(p.Encodings, _elem0) - } - if err := iprot.ReadListEnd(ctx); err != nil { - return thrift.PrependError("error reading list end: ", err) - } - return nil -} - -func (p *ColumnMetaData) ReadField3(ctx context.Context, iprot thrift.TProtocol) error { - _, size, err := iprot.ReadListBegin(ctx) - if err != nil { - return thrift.PrependError("error reading list begin: ", err) - } - tSlice := make([]string, 0, size) - p.PathInSchema = tSlice - for i := 0; i < size; i ++ { -var _elem1 string - if v, err := iprot.ReadString(ctx); err != nil { - return thrift.PrependError("error reading field 0: ", err) -} else { - _elem1 = v -} - p.PathInSchema = append(p.PathInSchema, _elem1) - } - if err := iprot.ReadListEnd(ctx); err != nil { - return thrift.PrependError("error reading list end: ", err) - } - return nil -} - -func (p *ColumnMetaData) ReadField4(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadI32(ctx); err != nil { - return thrift.PrependError("error reading field 4: ", err) -} else { - temp := CompressionCodec(v) - p.Codec = temp -} - return nil -} - -func (p *ColumnMetaData) ReadField5(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadI64(ctx); err != nil { - return thrift.PrependError("error reading field 5: ", err) -} else { - p.NumValues = v -} - return nil -} - -func (p *ColumnMetaData) ReadField6(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadI64(ctx); err != nil { - return thrift.PrependError("error reading field 6: ", err) -} else { - p.TotalUncompressedSize = v -} - return nil -} - -func (p *ColumnMetaData) ReadField7(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadI64(ctx); err != nil { - return thrift.PrependError("error reading field 7: ", err) -} else { - p.TotalCompressedSize = v -} - return nil -} - -func (p *ColumnMetaData) ReadField8(ctx context.Context, iprot thrift.TProtocol) error { - _, size, err := iprot.ReadListBegin(ctx) - if err != nil { - return thrift.PrependError("error reading list begin: ", err) - } - tSlice := make([]*KeyValue, 0, size) - p.KeyValueMetadata = tSlice - for i := 0; i < size; i ++ { - _elem2 := &KeyValue{} - if err := _elem2.Read(ctx, iprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", _elem2), err) - } - p.KeyValueMetadata = append(p.KeyValueMetadata, _elem2) - } - if err := iprot.ReadListEnd(ctx); err != nil { - return thrift.PrependError("error reading list end: ", err) - } - return nil -} - -func (p *ColumnMetaData) ReadField9(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadI64(ctx); err != nil { - return thrift.PrependError("error reading field 9: ", err) -} else { - p.DataPageOffset = v -} - return nil -} - -func (p *ColumnMetaData) ReadField10(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadI64(ctx); err != nil { - return thrift.PrependError("error reading field 10: ", err) -} else { - p.IndexPageOffset = &v -} - return nil -} - -func (p *ColumnMetaData) ReadField11(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadI64(ctx); err != nil { - return thrift.PrependError("error reading field 11: ", err) -} else { - p.DictionaryPageOffset = &v -} - return nil -} - -func (p *ColumnMetaData) ReadField12(ctx context.Context, iprot thrift.TProtocol) error { - p.Statistics = &Statistics{} - if err := p.Statistics.Read(ctx, iprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.Statistics), err) - } - return nil -} - -func (p *ColumnMetaData) ReadField13(ctx context.Context, iprot thrift.TProtocol) error { - _, size, err := iprot.ReadListBegin(ctx) - if err != nil { - return thrift.PrependError("error reading list begin: ", err) - } - tSlice := make([]*PageEncodingStats, 0, size) - p.EncodingStats = tSlice - for i := 0; i < size; i ++ { - _elem3 := &PageEncodingStats{} - if err := _elem3.Read(ctx, iprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", _elem3), err) - } - p.EncodingStats = append(p.EncodingStats, _elem3) - } - if err := iprot.ReadListEnd(ctx); err != nil { - return thrift.PrependError("error reading list end: ", err) - } - return nil -} - -func (p *ColumnMetaData) ReadField14(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadI64(ctx); err != nil { - return thrift.PrependError("error reading field 14: ", err) -} else { - p.BloomFilterOffset = &v -} - return nil + if _, err := iprot.ReadStructBegin(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) + } + + var issetType bool = false + var issetEncodings bool = false + var issetPathInSchema bool = false + var issetCodec bool = false + var issetNumValues bool = false + var issetTotalUncompressedSize bool = false + var issetTotalCompressedSize bool = false + var issetDataPageOffset bool = false + + for { + _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) + if err != nil { + return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) + } + if fieldTypeId == thrift.STOP { + break + } + switch fieldId { + case 1: + if fieldTypeId == thrift.I32 { + if err := p.ReadField1(ctx, iprot); err != nil { + return err + } + issetType = true + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 2: + if fieldTypeId == thrift.LIST { + if err := p.ReadField2(ctx, iprot); err != nil { + return err + } + issetEncodings = true + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 3: + if fieldTypeId == thrift.LIST { + if err := p.ReadField3(ctx, iprot); err != nil { + return err + } + issetPathInSchema = true + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 4: + if fieldTypeId == thrift.I32 { + if err := p.ReadField4(ctx, iprot); err != nil { + return err + } + issetCodec = true + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 5: + if fieldTypeId == thrift.I64 { + if err := p.ReadField5(ctx, iprot); err != nil { + return err + } + issetNumValues = true + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 6: + if fieldTypeId == thrift.I64 { + if err := p.ReadField6(ctx, iprot); err != nil { + return err + } + issetTotalUncompressedSize = true + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 7: + if fieldTypeId == thrift.I64 { + if err := p.ReadField7(ctx, iprot); err != nil { + return err + } + issetTotalCompressedSize = true + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 8: + if fieldTypeId == thrift.LIST { + if err := p.ReadField8(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 9: + if fieldTypeId == thrift.I64 { + if err := p.ReadField9(ctx, iprot); err != nil { + return err + } + issetDataPageOffset = true + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 10: + if fieldTypeId == thrift.I64 { + if err := p.ReadField10(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 11: + if fieldTypeId == thrift.I64 { + if err := p.ReadField11(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 12: + if fieldTypeId == thrift.STRUCT { + if err := p.ReadField12(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 13: + if fieldTypeId == thrift.LIST { + if err := p.ReadField13(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 14: + if fieldTypeId == thrift.I64 { + if err := p.ReadField14(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + default: + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + if err := iprot.ReadFieldEnd(ctx); err != nil { + return err + } + } + if err := iprot.ReadStructEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) + } + if !issetType { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Type is not set")) + } + if !issetEncodings { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Encodings is not set")) + } + if !issetPathInSchema { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field PathInSchema is not set")) + } + if !issetCodec { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Codec is not set")) + } + if !issetNumValues { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field NumValues is not set")) + } + if !issetTotalUncompressedSize { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field TotalUncompressedSize is not set")) + } + if !issetTotalCompressedSize { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field TotalCompressedSize is not set")) + } + if !issetDataPageOffset { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field DataPageOffset is not set")) + } + return nil +} + +func (p *ColumnMetaData) ReadField1(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadI32(ctx); err != nil { + return thrift.PrependError("error reading field 1: ", err) + } else { + temp := Type(v) + p.Type = temp + } + return nil +} + +func (p *ColumnMetaData) ReadField2(ctx context.Context, iprot thrift.TProtocol) error { + _, size, err := iprot.ReadListBegin(ctx) + if err != nil { + return thrift.PrependError("error reading list begin: ", err) + } + tSlice := make([]Encoding, 0, size) + p.Encodings = tSlice + for i := 0; i < size; i++ { + var _elem0 Encoding + if v, err := iprot.ReadI32(ctx); err != nil { + return thrift.PrependError("error reading field 0: ", err) + } else { + temp := Encoding(v) + _elem0 = temp + } + p.Encodings = append(p.Encodings, _elem0) + } + if err := iprot.ReadListEnd(ctx); err != nil { + return thrift.PrependError("error reading list end: ", err) + } + return nil +} + +func (p *ColumnMetaData) ReadField3(ctx context.Context, iprot thrift.TProtocol) error { + _, size, err := iprot.ReadListBegin(ctx) + if err != nil { + return thrift.PrependError("error reading list begin: ", err) + } + tSlice := make([]string, 0, size) + p.PathInSchema = tSlice + for i := 0; i < size; i++ { + var _elem1 string + if v, err := iprot.ReadString(ctx); err != nil { + return thrift.PrependError("error reading field 0: ", err) + } else { + _elem1 = v + } + p.PathInSchema = append(p.PathInSchema, _elem1) + } + if err := iprot.ReadListEnd(ctx); err != nil { + return thrift.PrependError("error reading list end: ", err) + } + return nil +} + +func (p *ColumnMetaData) ReadField4(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadI32(ctx); err != nil { + return thrift.PrependError("error reading field 4: ", err) + } else { + temp := CompressionCodec(v) + p.Codec = temp + } + return nil +} + +func (p *ColumnMetaData) ReadField5(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadI64(ctx); err != nil { + return thrift.PrependError("error reading field 5: ", err) + } else { + p.NumValues = v + } + return nil +} + +func (p *ColumnMetaData) ReadField6(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadI64(ctx); err != nil { + return thrift.PrependError("error reading field 6: ", err) + } else { + p.TotalUncompressedSize = v + } + return nil +} + +func (p *ColumnMetaData) ReadField7(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadI64(ctx); err != nil { + return thrift.PrependError("error reading field 7: ", err) + } else { + p.TotalCompressedSize = v + } + return nil +} + +func (p *ColumnMetaData) ReadField8(ctx context.Context, iprot thrift.TProtocol) error { + _, size, err := iprot.ReadListBegin(ctx) + if err != nil { + return thrift.PrependError("error reading list begin: ", err) + } + tSlice := make([]*KeyValue, 0, size) + p.KeyValueMetadata = tSlice + for i := 0; i < size; i++ { + _elem2 := &KeyValue{} + if err := _elem2.Read(ctx, iprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", _elem2), err) + } + p.KeyValueMetadata = append(p.KeyValueMetadata, _elem2) + } + if err := iprot.ReadListEnd(ctx); err != nil { + return thrift.PrependError("error reading list end: ", err) + } + return nil +} + +func (p *ColumnMetaData) ReadField9(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadI64(ctx); err != nil { + return thrift.PrependError("error reading field 9: ", err) + } else { + p.DataPageOffset = v + } + return nil +} + +func (p *ColumnMetaData) ReadField10(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadI64(ctx); err != nil { + return thrift.PrependError("error reading field 10: ", err) + } else { + p.IndexPageOffset = &v + } + return nil +} + +func (p *ColumnMetaData) ReadField11(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadI64(ctx); err != nil { + return thrift.PrependError("error reading field 11: ", err) + } else { + p.DictionaryPageOffset = &v + } + return nil +} + +func (p *ColumnMetaData) ReadField12(ctx context.Context, iprot thrift.TProtocol) error { + p.Statistics = &Statistics{} + if err := p.Statistics.Read(ctx, iprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.Statistics), err) + } + return nil +} + +func (p *ColumnMetaData) ReadField13(ctx context.Context, iprot thrift.TProtocol) error { + _, size, err := iprot.ReadListBegin(ctx) + if err != nil { + return thrift.PrependError("error reading list begin: ", err) + } + tSlice := make([]*PageEncodingStats, 0, size) + p.EncodingStats = tSlice + for i := 0; i < size; i++ { + _elem3 := &PageEncodingStats{} + if err := _elem3.Read(ctx, iprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", _elem3), err) + } + p.EncodingStats = append(p.EncodingStats, _elem3) + } + if err := iprot.ReadListEnd(ctx); err != nil { + return thrift.PrependError("error reading list end: ", err) + } + return nil +} + +func (p *ColumnMetaData) ReadField14(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadI64(ctx); err != nil { + return thrift.PrependError("error reading field 14: ", err) + } else { + p.BloomFilterOffset = &v + } + return nil } func (p *ColumnMetaData) Write(ctx context.Context, oprot thrift.TProtocol) error { - if err := oprot.WriteStructBegin(ctx, "ColumnMetaData"); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) } - if p != nil { - if err := p.writeField1(ctx, oprot); err != nil { return err } - if err := p.writeField2(ctx, oprot); err != nil { return err } - if err := p.writeField3(ctx, oprot); err != nil { return err } - if err := p.writeField4(ctx, oprot); err != nil { return err } - if err := p.writeField5(ctx, oprot); err != nil { return err } - if err := p.writeField6(ctx, oprot); err != nil { return err } - if err := p.writeField7(ctx, oprot); err != nil { return err } - if err := p.writeField8(ctx, oprot); err != nil { return err } - if err := p.writeField9(ctx, oprot); err != nil { return err } - if err := p.writeField10(ctx, oprot); err != nil { return err } - if err := p.writeField11(ctx, oprot); err != nil { return err } - if err := p.writeField12(ctx, oprot); err != nil { return err } - if err := p.writeField13(ctx, oprot); err != nil { return err } - if err := p.writeField14(ctx, oprot); err != nil { return err } - } - if err := oprot.WriteFieldStop(ctx); err != nil { - return thrift.PrependError("write field stop error: ", err) } - if err := oprot.WriteStructEnd(ctx); err != nil { - return thrift.PrependError("write struct stop error: ", err) } - return nil + if err := oprot.WriteStructBegin(ctx, "ColumnMetaData"); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) + } + if p != nil { + if err := p.writeField1(ctx, oprot); err != nil { + return err + } + if err := p.writeField2(ctx, oprot); err != nil { + return err + } + if err := p.writeField3(ctx, oprot); err != nil { + return err + } + if err := p.writeField4(ctx, oprot); err != nil { + return err + } + if err := p.writeField5(ctx, oprot); err != nil { + return err + } + if err := p.writeField6(ctx, oprot); err != nil { + return err + } + if err := p.writeField7(ctx, oprot); err != nil { + return err + } + if err := p.writeField8(ctx, oprot); err != nil { + return err + } + if err := p.writeField9(ctx, oprot); err != nil { + return err + } + if err := p.writeField10(ctx, oprot); err != nil { + return err + } + if err := p.writeField11(ctx, oprot); err != nil { + return err + } + if err := p.writeField12(ctx, oprot); err != nil { + return err + } + if err := p.writeField13(ctx, oprot); err != nil { + return err + } + if err := p.writeField14(ctx, oprot); err != nil { + return err + } + } + if err := oprot.WriteFieldStop(ctx); err != nil { + return thrift.PrependError("write field stop error: ", err) + } + if err := oprot.WriteStructEnd(ctx); err != nil { + return thrift.PrependError("write struct stop error: ", err) + } + return nil } func (p *ColumnMetaData) writeField1(ctx context.Context, oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin(ctx, "type", thrift.I32, 1); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:type: ", p), err) } - if err := oprot.WriteI32(ctx, int32(p.Type)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.type (1) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 1:type: ", p), err) } - return err + if err := oprot.WriteFieldBegin(ctx, "type", thrift.I32, 1); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:type: ", p), err) + } + if err := oprot.WriteI32(ctx, int32(p.Type)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.type (1) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 1:type: ", p), err) + } + return err } func (p *ColumnMetaData) writeField2(ctx context.Context, oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin(ctx, "encodings", thrift.LIST, 2); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:encodings: ", p), err) } - if err := oprot.WriteListBegin(ctx, thrift.I32, len(p.Encodings)); err != nil { - return thrift.PrependError("error writing list begin: ", err) - } - for _, v := range p.Encodings { - if err := oprot.WriteI32(ctx, int32(v)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T. (0) field write error: ", p), err) } - } - if err := oprot.WriteListEnd(ctx); err != nil { - return thrift.PrependError("error writing list end: ", err) - } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 2:encodings: ", p), err) } - return err + if err := oprot.WriteFieldBegin(ctx, "encodings", thrift.LIST, 2); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:encodings: ", p), err) + } + if err := oprot.WriteListBegin(ctx, thrift.I32, len(p.Encodings)); err != nil { + return thrift.PrependError("error writing list begin: ", err) + } + for _, v := range p.Encodings { + if err := oprot.WriteI32(ctx, int32(v)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T. (0) field write error: ", p), err) + } + } + if err := oprot.WriteListEnd(ctx); err != nil { + return thrift.PrependError("error writing list end: ", err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 2:encodings: ", p), err) + } + return err } func (p *ColumnMetaData) writeField3(ctx context.Context, oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin(ctx, "path_in_schema", thrift.LIST, 3); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 3:path_in_schema: ", p), err) } - if err := oprot.WriteListBegin(ctx, thrift.STRING, len(p.PathInSchema)); err != nil { - return thrift.PrependError("error writing list begin: ", err) - } - for _, v := range p.PathInSchema { - if err := oprot.WriteString(ctx, string(v)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T. (0) field write error: ", p), err) } - } - if err := oprot.WriteListEnd(ctx); err != nil { - return thrift.PrependError("error writing list end: ", err) - } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 3:path_in_schema: ", p), err) } - return err + if err := oprot.WriteFieldBegin(ctx, "path_in_schema", thrift.LIST, 3); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 3:path_in_schema: ", p), err) + } + if err := oprot.WriteListBegin(ctx, thrift.STRING, len(p.PathInSchema)); err != nil { + return thrift.PrependError("error writing list begin: ", err) + } + for _, v := range p.PathInSchema { + if err := oprot.WriteString(ctx, string(v)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T. (0) field write error: ", p), err) + } + } + if err := oprot.WriteListEnd(ctx); err != nil { + return thrift.PrependError("error writing list end: ", err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 3:path_in_schema: ", p), err) + } + return err } func (p *ColumnMetaData) writeField4(ctx context.Context, oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin(ctx, "codec", thrift.I32, 4); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 4:codec: ", p), err) } - if err := oprot.WriteI32(ctx, int32(p.Codec)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.codec (4) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 4:codec: ", p), err) } - return err + if err := oprot.WriteFieldBegin(ctx, "codec", thrift.I32, 4); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 4:codec: ", p), err) + } + if err := oprot.WriteI32(ctx, int32(p.Codec)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.codec (4) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 4:codec: ", p), err) + } + return err } func (p *ColumnMetaData) writeField5(ctx context.Context, oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin(ctx, "num_values", thrift.I64, 5); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 5:num_values: ", p), err) } - if err := oprot.WriteI64(ctx, int64(p.NumValues)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.num_values (5) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 5:num_values: ", p), err) } - return err + if err := oprot.WriteFieldBegin(ctx, "num_values", thrift.I64, 5); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 5:num_values: ", p), err) + } + if err := oprot.WriteI64(ctx, int64(p.NumValues)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.num_values (5) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 5:num_values: ", p), err) + } + return err } func (p *ColumnMetaData) writeField6(ctx context.Context, oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin(ctx, "total_uncompressed_size", thrift.I64, 6); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 6:total_uncompressed_size: ", p), err) } - if err := oprot.WriteI64(ctx, int64(p.TotalUncompressedSize)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.total_uncompressed_size (6) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 6:total_uncompressed_size: ", p), err) } - return err + if err := oprot.WriteFieldBegin(ctx, "total_uncompressed_size", thrift.I64, 6); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 6:total_uncompressed_size: ", p), err) + } + if err := oprot.WriteI64(ctx, int64(p.TotalUncompressedSize)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.total_uncompressed_size (6) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 6:total_uncompressed_size: ", p), err) + } + return err } func (p *ColumnMetaData) writeField7(ctx context.Context, oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin(ctx, "total_compressed_size", thrift.I64, 7); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 7:total_compressed_size: ", p), err) } - if err := oprot.WriteI64(ctx, int64(p.TotalCompressedSize)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.total_compressed_size (7) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 7:total_compressed_size: ", p), err) } - return err + if err := oprot.WriteFieldBegin(ctx, "total_compressed_size", thrift.I64, 7); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 7:total_compressed_size: ", p), err) + } + if err := oprot.WriteI64(ctx, int64(p.TotalCompressedSize)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.total_compressed_size (7) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 7:total_compressed_size: ", p), err) + } + return err } func (p *ColumnMetaData) writeField8(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetKeyValueMetadata() { - if err := oprot.WriteFieldBegin(ctx, "key_value_metadata", thrift.LIST, 8); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 8:key_value_metadata: ", p), err) } - if err := oprot.WriteListBegin(ctx, thrift.STRUCT, len(p.KeyValueMetadata)); err != nil { - return thrift.PrependError("error writing list begin: ", err) - } - for _, v := range p.KeyValueMetadata { - if err := v.Write(ctx, oprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", v), err) - } - } - if err := oprot.WriteListEnd(ctx); err != nil { - return thrift.PrependError("error writing list end: ", err) - } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 8:key_value_metadata: ", p), err) } - } - return err + if p.IsSetKeyValueMetadata() { + if err := oprot.WriteFieldBegin(ctx, "key_value_metadata", thrift.LIST, 8); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 8:key_value_metadata: ", p), err) + } + if err := oprot.WriteListBegin(ctx, thrift.STRUCT, len(p.KeyValueMetadata)); err != nil { + return thrift.PrependError("error writing list begin: ", err) + } + for _, v := range p.KeyValueMetadata { + if err := v.Write(ctx, oprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", v), err) + } + } + if err := oprot.WriteListEnd(ctx); err != nil { + return thrift.PrependError("error writing list end: ", err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 8:key_value_metadata: ", p), err) + } + } + return err } func (p *ColumnMetaData) writeField9(ctx context.Context, oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin(ctx, "data_page_offset", thrift.I64, 9); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 9:data_page_offset: ", p), err) } - if err := oprot.WriteI64(ctx, int64(p.DataPageOffset)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.data_page_offset (9) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 9:data_page_offset: ", p), err) } - return err + if err := oprot.WriteFieldBegin(ctx, "data_page_offset", thrift.I64, 9); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 9:data_page_offset: ", p), err) + } + if err := oprot.WriteI64(ctx, int64(p.DataPageOffset)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.data_page_offset (9) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 9:data_page_offset: ", p), err) + } + return err } func (p *ColumnMetaData) writeField10(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetIndexPageOffset() { - if err := oprot.WriteFieldBegin(ctx, "index_page_offset", thrift.I64, 10); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 10:index_page_offset: ", p), err) } - if err := oprot.WriteI64(ctx, int64(*p.IndexPageOffset)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.index_page_offset (10) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 10:index_page_offset: ", p), err) } - } - return err + if p.IsSetIndexPageOffset() { + if err := oprot.WriteFieldBegin(ctx, "index_page_offset", thrift.I64, 10); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 10:index_page_offset: ", p), err) + } + if err := oprot.WriteI64(ctx, int64(*p.IndexPageOffset)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.index_page_offset (10) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 10:index_page_offset: ", p), err) + } + } + return err } func (p *ColumnMetaData) writeField11(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetDictionaryPageOffset() { - if err := oprot.WriteFieldBegin(ctx, "dictionary_page_offset", thrift.I64, 11); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 11:dictionary_page_offset: ", p), err) } - if err := oprot.WriteI64(ctx, int64(*p.DictionaryPageOffset)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.dictionary_page_offset (11) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 11:dictionary_page_offset: ", p), err) } - } - return err + if p.IsSetDictionaryPageOffset() { + if err := oprot.WriteFieldBegin(ctx, "dictionary_page_offset", thrift.I64, 11); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 11:dictionary_page_offset: ", p), err) + } + if err := oprot.WriteI64(ctx, int64(*p.DictionaryPageOffset)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.dictionary_page_offset (11) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 11:dictionary_page_offset: ", p), err) + } + } + return err } func (p *ColumnMetaData) writeField12(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetStatistics() { - if err := oprot.WriteFieldBegin(ctx, "statistics", thrift.STRUCT, 12); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 12:statistics: ", p), err) } - if err := p.Statistics.Write(ctx, oprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.Statistics), err) - } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 12:statistics: ", p), err) } - } - return err + if p.IsSetStatistics() { + if err := oprot.WriteFieldBegin(ctx, "statistics", thrift.STRUCT, 12); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 12:statistics: ", p), err) + } + if err := p.Statistics.Write(ctx, oprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.Statistics), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 12:statistics: ", p), err) + } + } + return err } func (p *ColumnMetaData) writeField13(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetEncodingStats() { - if err := oprot.WriteFieldBegin(ctx, "encoding_stats", thrift.LIST, 13); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 13:encoding_stats: ", p), err) } - if err := oprot.WriteListBegin(ctx, thrift.STRUCT, len(p.EncodingStats)); err != nil { - return thrift.PrependError("error writing list begin: ", err) - } - for _, v := range p.EncodingStats { - if err := v.Write(ctx, oprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", v), err) - } - } - if err := oprot.WriteListEnd(ctx); err != nil { - return thrift.PrependError("error writing list end: ", err) - } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 13:encoding_stats: ", p), err) } - } - return err + if p.IsSetEncodingStats() { + if err := oprot.WriteFieldBegin(ctx, "encoding_stats", thrift.LIST, 13); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 13:encoding_stats: ", p), err) + } + if err := oprot.WriteListBegin(ctx, thrift.STRUCT, len(p.EncodingStats)); err != nil { + return thrift.PrependError("error writing list begin: ", err) + } + for _, v := range p.EncodingStats { + if err := v.Write(ctx, oprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", v), err) + } + } + if err := oprot.WriteListEnd(ctx); err != nil { + return thrift.PrependError("error writing list end: ", err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 13:encoding_stats: ", p), err) + } + } + return err } func (p *ColumnMetaData) writeField14(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetBloomFilterOffset() { - if err := oprot.WriteFieldBegin(ctx, "bloom_filter_offset", thrift.I64, 14); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 14:bloom_filter_offset: ", p), err) } - if err := oprot.WriteI64(ctx, int64(*p.BloomFilterOffset)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.bloom_filter_offset (14) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 14:bloom_filter_offset: ", p), err) } - } - return err + if p.IsSetBloomFilterOffset() { + if err := oprot.WriteFieldBegin(ctx, "bloom_filter_offset", thrift.I64, 14); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 14:bloom_filter_offset: ", p), err) + } + if err := oprot.WriteI64(ctx, int64(*p.BloomFilterOffset)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.bloom_filter_offset (14) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 14:bloom_filter_offset: ", p), err) + } + } + return err } func (p *ColumnMetaData) Equals(other *ColumnMetaData) bool { - if p == other { - return true - } else if p == nil || other == nil { - return false - } - if p.Type != other.Type { return false } - if len(p.Encodings) != len(other.Encodings) { return false } - for i, _tgt := range p.Encodings { - _src4 := other.Encodings[i] - if _tgt != _src4 { return false } - } - if len(p.PathInSchema) != len(other.PathInSchema) { return false } - for i, _tgt := range p.PathInSchema { - _src5 := other.PathInSchema[i] - if _tgt != _src5 { return false } - } - if p.Codec != other.Codec { return false } - if p.NumValues != other.NumValues { return false } - if p.TotalUncompressedSize != other.TotalUncompressedSize { return false } - if p.TotalCompressedSize != other.TotalCompressedSize { return false } - if len(p.KeyValueMetadata) != len(other.KeyValueMetadata) { return false } - for i, _tgt := range p.KeyValueMetadata { - _src6 := other.KeyValueMetadata[i] - if !_tgt.Equals(_src6) { return false } - } - if p.DataPageOffset != other.DataPageOffset { return false } - if p.IndexPageOffset != other.IndexPageOffset { - if p.IndexPageOffset == nil || other.IndexPageOffset == nil { - return false - } - if (*p.IndexPageOffset) != (*other.IndexPageOffset) { return false } - } - if p.DictionaryPageOffset != other.DictionaryPageOffset { - if p.DictionaryPageOffset == nil || other.DictionaryPageOffset == nil { - return false - } - if (*p.DictionaryPageOffset) != (*other.DictionaryPageOffset) { return false } - } - if !p.Statistics.Equals(other.Statistics) { return false } - if len(p.EncodingStats) != len(other.EncodingStats) { return false } - for i, _tgt := range p.EncodingStats { - _src7 := other.EncodingStats[i] - if !_tgt.Equals(_src7) { return false } - } - if p.BloomFilterOffset != other.BloomFilterOffset { - if p.BloomFilterOffset == nil || other.BloomFilterOffset == nil { - return false - } - if (*p.BloomFilterOffset) != (*other.BloomFilterOffset) { return false } - } - return true + if p == other { + return true + } else if p == nil || other == nil { + return false + } + if p.Type != other.Type { + return false + } + if len(p.Encodings) != len(other.Encodings) { + return false + } + for i, _tgt := range p.Encodings { + _src4 := other.Encodings[i] + if _tgt != _src4 { + return false + } + } + if len(p.PathInSchema) != len(other.PathInSchema) { + return false + } + for i, _tgt := range p.PathInSchema { + _src5 := other.PathInSchema[i] + if _tgt != _src5 { + return false + } + } + if p.Codec != other.Codec { + return false + } + if p.NumValues != other.NumValues { + return false + } + if p.TotalUncompressedSize != other.TotalUncompressedSize { + return false + } + if p.TotalCompressedSize != other.TotalCompressedSize { + return false + } + if len(p.KeyValueMetadata) != len(other.KeyValueMetadata) { + return false + } + for i, _tgt := range p.KeyValueMetadata { + _src6 := other.KeyValueMetadata[i] + if !_tgt.Equals(_src6) { + return false + } + } + if p.DataPageOffset != other.DataPageOffset { + return false + } + if p.IndexPageOffset != other.IndexPageOffset { + if p.IndexPageOffset == nil || other.IndexPageOffset == nil { + return false + } + if (*p.IndexPageOffset) != (*other.IndexPageOffset) { + return false + } + } + if p.DictionaryPageOffset != other.DictionaryPageOffset { + if p.DictionaryPageOffset == nil || other.DictionaryPageOffset == nil { + return false + } + if (*p.DictionaryPageOffset) != (*other.DictionaryPageOffset) { + return false + } + } + if !p.Statistics.Equals(other.Statistics) { + return false + } + if len(p.EncodingStats) != len(other.EncodingStats) { + return false + } + for i, _tgt := range p.EncodingStats { + _src7 := other.EncodingStats[i] + if !_tgt.Equals(_src7) { + return false + } + } + if p.BloomFilterOffset != other.BloomFilterOffset { + if p.BloomFilterOffset == nil || other.BloomFilterOffset == nil { + return false + } + if (*p.BloomFilterOffset) != (*other.BloomFilterOffset) { + return false + } + } + return true } func (p *ColumnMetaData) String() string { - if p == nil { - return "" - } - return fmt.Sprintf("ColumnMetaData(%+v)", *p) + if p == nil { + return "" + } + return fmt.Sprintf("ColumnMetaData(%+v)", *p) } func (p *ColumnMetaData) Validate() error { - return nil + return nil } + type EncryptionWithFooterKey struct { } func NewEncryptionWithFooterKey() *EncryptionWithFooterKey { - return &EncryptionWithFooterKey{} + return &EncryptionWithFooterKey{} } func (p *EncryptionWithFooterKey) Read(ctx context.Context, iprot thrift.TProtocol) error { - if _, err := iprot.ReadStructBegin(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) - } - - - for { - _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) - if err != nil { - return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) - } - if fieldTypeId == thrift.STOP { break; } - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - if err := iprot.ReadFieldEnd(ctx); err != nil { - return err - } - } - if err := iprot.ReadStructEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) - } - return nil + if _, err := iprot.ReadStructBegin(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) + } + + for { + _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) + if err != nil { + return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) + } + if fieldTypeId == thrift.STOP { + break + } + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + if err := iprot.ReadFieldEnd(ctx); err != nil { + return err + } + } + if err := iprot.ReadStructEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) + } + return nil } func (p *EncryptionWithFooterKey) Write(ctx context.Context, oprot thrift.TProtocol) error { - if err := oprot.WriteStructBegin(ctx, "EncryptionWithFooterKey"); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) } - if p != nil { - } - if err := oprot.WriteFieldStop(ctx); err != nil { - return thrift.PrependError("write field stop error: ", err) } - if err := oprot.WriteStructEnd(ctx); err != nil { - return thrift.PrependError("write struct stop error: ", err) } - return nil + if err := oprot.WriteStructBegin(ctx, "EncryptionWithFooterKey"); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) + } + if p != nil { + } + if err := oprot.WriteFieldStop(ctx); err != nil { + return thrift.PrependError("write field stop error: ", err) + } + if err := oprot.WriteStructEnd(ctx); err != nil { + return thrift.PrependError("write struct stop error: ", err) + } + return nil } func (p *EncryptionWithFooterKey) Equals(other *EncryptionWithFooterKey) bool { - if p == other { - return true - } else if p == nil || other == nil { - return false - } - return true + if p == other { + return true + } else if p == nil || other == nil { + return false + } + return true } func (p *EncryptionWithFooterKey) String() string { - if p == nil { - return "" - } - return fmt.Sprintf("EncryptionWithFooterKey(%+v)", *p) + if p == nil { + return "" + } + return fmt.Sprintf("EncryptionWithFooterKey(%+v)", *p) } func (p *EncryptionWithFooterKey) Validate() error { - return nil + return nil } + // Attributes: -// - PathInSchema: Column path in schema * -// - KeyMetadata: Retrieval metadata of column encryption key * +// - PathInSchema: Column path in schema * +// - KeyMetadata: Retrieval metadata of column encryption key * type EncryptionWithColumnKey struct { - PathInSchema []string `thrift:"path_in_schema,1,required" db:"path_in_schema" json:"path_in_schema"` - KeyMetadata []byte `thrift:"key_metadata,2" db:"key_metadata" json:"key_metadata,omitempty"` + PathInSchema []string `thrift:"path_in_schema,1,required" db:"path_in_schema" json:"path_in_schema"` + KeyMetadata []byte `thrift:"key_metadata,2" db:"key_metadata" json:"key_metadata,omitempty"` } func NewEncryptionWithColumnKey() *EncryptionWithColumnKey { - return &EncryptionWithColumnKey{} + return &EncryptionWithColumnKey{} } - func (p *EncryptionWithColumnKey) GetPathInSchema() []string { - return p.PathInSchema + return p.PathInSchema } + var EncryptionWithColumnKey_KeyMetadata_DEFAULT []byte func (p *EncryptionWithColumnKey) GetKeyMetadata() []byte { - return p.KeyMetadata + return p.KeyMetadata } func (p *EncryptionWithColumnKey) IsSetKeyMetadata() bool { - return p.KeyMetadata != nil + return p.KeyMetadata != nil } func (p *EncryptionWithColumnKey) Read(ctx context.Context, iprot thrift.TProtocol) error { - if _, err := iprot.ReadStructBegin(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) - } - - var issetPathInSchema bool = false; - - for { - _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) - if err != nil { - return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) - } - if fieldTypeId == thrift.STOP { break; } - switch fieldId { - case 1: - if fieldTypeId == thrift.LIST { - if err := p.ReadField1(ctx, iprot); err != nil { - return err - } - issetPathInSchema = true - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 2: - if fieldTypeId == thrift.STRING { - if err := p.ReadField2(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - default: - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - if err := iprot.ReadFieldEnd(ctx); err != nil { - return err - } - } - if err := iprot.ReadStructEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) - } - if !issetPathInSchema{ - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field PathInSchema is not set")); - } - return nil -} - -func (p *EncryptionWithColumnKey) ReadField1(ctx context.Context, iprot thrift.TProtocol) error { - _, size, err := iprot.ReadListBegin(ctx) - if err != nil { - return thrift.PrependError("error reading list begin: ", err) - } - tSlice := make([]string, 0, size) - p.PathInSchema = tSlice - for i := 0; i < size; i ++ { -var _elem8 string - if v, err := iprot.ReadString(ctx); err != nil { - return thrift.PrependError("error reading field 0: ", err) -} else { - _elem8 = v -} - p.PathInSchema = append(p.PathInSchema, _elem8) - } - if err := iprot.ReadListEnd(ctx); err != nil { - return thrift.PrependError("error reading list end: ", err) - } - return nil -} - -func (p *EncryptionWithColumnKey) ReadField2(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadBinary(ctx); err != nil { - return thrift.PrependError("error reading field 2: ", err) -} else { - p.KeyMetadata = v -} - return nil + if _, err := iprot.ReadStructBegin(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) + } + + var issetPathInSchema bool = false + + for { + _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) + if err != nil { + return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) + } + if fieldTypeId == thrift.STOP { + break + } + switch fieldId { + case 1: + if fieldTypeId == thrift.LIST { + if err := p.ReadField1(ctx, iprot); err != nil { + return err + } + issetPathInSchema = true + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 2: + if fieldTypeId == thrift.STRING { + if err := p.ReadField2(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + default: + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + if err := iprot.ReadFieldEnd(ctx); err != nil { + return err + } + } + if err := iprot.ReadStructEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) + } + if !issetPathInSchema { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field PathInSchema is not set")) + } + return nil +} + +func (p *EncryptionWithColumnKey) ReadField1(ctx context.Context, iprot thrift.TProtocol) error { + _, size, err := iprot.ReadListBegin(ctx) + if err != nil { + return thrift.PrependError("error reading list begin: ", err) + } + tSlice := make([]string, 0, size) + p.PathInSchema = tSlice + for i := 0; i < size; i++ { + var _elem8 string + if v, err := iprot.ReadString(ctx); err != nil { + return thrift.PrependError("error reading field 0: ", err) + } else { + _elem8 = v + } + p.PathInSchema = append(p.PathInSchema, _elem8) + } + if err := iprot.ReadListEnd(ctx); err != nil { + return thrift.PrependError("error reading list end: ", err) + } + return nil +} + +func (p *EncryptionWithColumnKey) ReadField2(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadBinary(ctx); err != nil { + return thrift.PrependError("error reading field 2: ", err) + } else { + p.KeyMetadata = v + } + return nil } func (p *EncryptionWithColumnKey) Write(ctx context.Context, oprot thrift.TProtocol) error { - if err := oprot.WriteStructBegin(ctx, "EncryptionWithColumnKey"); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) } - if p != nil { - if err := p.writeField1(ctx, oprot); err != nil { return err } - if err := p.writeField2(ctx, oprot); err != nil { return err } - } - if err := oprot.WriteFieldStop(ctx); err != nil { - return thrift.PrependError("write field stop error: ", err) } - if err := oprot.WriteStructEnd(ctx); err != nil { - return thrift.PrependError("write struct stop error: ", err) } - return nil + if err := oprot.WriteStructBegin(ctx, "EncryptionWithColumnKey"); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) + } + if p != nil { + if err := p.writeField1(ctx, oprot); err != nil { + return err + } + if err := p.writeField2(ctx, oprot); err != nil { + return err + } + } + if err := oprot.WriteFieldStop(ctx); err != nil { + return thrift.PrependError("write field stop error: ", err) + } + if err := oprot.WriteStructEnd(ctx); err != nil { + return thrift.PrependError("write struct stop error: ", err) + } + return nil } func (p *EncryptionWithColumnKey) writeField1(ctx context.Context, oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin(ctx, "path_in_schema", thrift.LIST, 1); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:path_in_schema: ", p), err) } - if err := oprot.WriteListBegin(ctx, thrift.STRING, len(p.PathInSchema)); err != nil { - return thrift.PrependError("error writing list begin: ", err) - } - for _, v := range p.PathInSchema { - if err := oprot.WriteString(ctx, string(v)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T. (0) field write error: ", p), err) } - } - if err := oprot.WriteListEnd(ctx); err != nil { - return thrift.PrependError("error writing list end: ", err) - } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 1:path_in_schema: ", p), err) } - return err + if err := oprot.WriteFieldBegin(ctx, "path_in_schema", thrift.LIST, 1); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:path_in_schema: ", p), err) + } + if err := oprot.WriteListBegin(ctx, thrift.STRING, len(p.PathInSchema)); err != nil { + return thrift.PrependError("error writing list begin: ", err) + } + for _, v := range p.PathInSchema { + if err := oprot.WriteString(ctx, string(v)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T. (0) field write error: ", p), err) + } + } + if err := oprot.WriteListEnd(ctx); err != nil { + return thrift.PrependError("error writing list end: ", err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 1:path_in_schema: ", p), err) + } + return err } func (p *EncryptionWithColumnKey) writeField2(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetKeyMetadata() { - if err := oprot.WriteFieldBegin(ctx, "key_metadata", thrift.STRING, 2); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:key_metadata: ", p), err) } - if err := oprot.WriteBinary(ctx, p.KeyMetadata); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.key_metadata (2) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 2:key_metadata: ", p), err) } - } - return err + if p.IsSetKeyMetadata() { + if err := oprot.WriteFieldBegin(ctx, "key_metadata", thrift.STRING, 2); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:key_metadata: ", p), err) + } + if err := oprot.WriteBinary(ctx, p.KeyMetadata); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.key_metadata (2) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 2:key_metadata: ", p), err) + } + } + return err } func (p *EncryptionWithColumnKey) Equals(other *EncryptionWithColumnKey) bool { - if p == other { - return true - } else if p == nil || other == nil { - return false - } - if len(p.PathInSchema) != len(other.PathInSchema) { return false } - for i, _tgt := range p.PathInSchema { - _src9 := other.PathInSchema[i] - if _tgt != _src9 { return false } - } - if bytes.Compare(p.KeyMetadata, other.KeyMetadata) != 0 { return false } - return true + if p == other { + return true + } else if p == nil || other == nil { + return false + } + if len(p.PathInSchema) != len(other.PathInSchema) { + return false + } + for i, _tgt := range p.PathInSchema { + _src9 := other.PathInSchema[i] + if _tgt != _src9 { + return false + } + } + if bytes.Compare(p.KeyMetadata, other.KeyMetadata) != 0 { + return false + } + return true } func (p *EncryptionWithColumnKey) String() string { - if p == nil { - return "" - } - return fmt.Sprintf("EncryptionWithColumnKey(%+v)", *p) + if p == nil { + return "" + } + return fmt.Sprintf("EncryptionWithColumnKey(%+v)", *p) } func (p *EncryptionWithColumnKey) Validate() error { - return nil + return nil } + // Attributes: -// - ENCRYPTION_WITH_FOOTER_KEY -// - ENCRYPTION_WITH_COLUMN_KEY +// - ENCRYPTION_WITH_FOOTER_KEY +// - ENCRYPTION_WITH_COLUMN_KEY type ColumnCryptoMetaData struct { - ENCRYPTION_WITH_FOOTER_KEY *EncryptionWithFooterKey `thrift:"ENCRYPTION_WITH_FOOTER_KEY,1" db:"ENCRYPTION_WITH_FOOTER_KEY" json:"ENCRYPTION_WITH_FOOTER_KEY,omitempty"` - ENCRYPTION_WITH_COLUMN_KEY *EncryptionWithColumnKey `thrift:"ENCRYPTION_WITH_COLUMN_KEY,2" db:"ENCRYPTION_WITH_COLUMN_KEY" json:"ENCRYPTION_WITH_COLUMN_KEY,omitempty"` + ENCRYPTION_WITH_FOOTER_KEY *EncryptionWithFooterKey `thrift:"ENCRYPTION_WITH_FOOTER_KEY,1" db:"ENCRYPTION_WITH_FOOTER_KEY" json:"ENCRYPTION_WITH_FOOTER_KEY,omitempty"` + ENCRYPTION_WITH_COLUMN_KEY *EncryptionWithColumnKey `thrift:"ENCRYPTION_WITH_COLUMN_KEY,2" db:"ENCRYPTION_WITH_COLUMN_KEY" json:"ENCRYPTION_WITH_COLUMN_KEY,omitempty"` } func NewColumnCryptoMetaData() *ColumnCryptoMetaData { - return &ColumnCryptoMetaData{} + return &ColumnCryptoMetaData{} } var ColumnCryptoMetaData_ENCRYPTION_WITH_FOOTER_KEY_DEFAULT *EncryptionWithFooterKey + func (p *ColumnCryptoMetaData) GetENCRYPTION_WITH_FOOTER_KEY() *EncryptionWithFooterKey { - if !p.IsSetENCRYPTION_WITH_FOOTER_KEY() { - return ColumnCryptoMetaData_ENCRYPTION_WITH_FOOTER_KEY_DEFAULT - } -return p.ENCRYPTION_WITH_FOOTER_KEY + if !p.IsSetENCRYPTION_WITH_FOOTER_KEY() { + return ColumnCryptoMetaData_ENCRYPTION_WITH_FOOTER_KEY_DEFAULT + } + return p.ENCRYPTION_WITH_FOOTER_KEY } + var ColumnCryptoMetaData_ENCRYPTION_WITH_COLUMN_KEY_DEFAULT *EncryptionWithColumnKey + func (p *ColumnCryptoMetaData) GetENCRYPTION_WITH_COLUMN_KEY() *EncryptionWithColumnKey { - if !p.IsSetENCRYPTION_WITH_COLUMN_KEY() { - return ColumnCryptoMetaData_ENCRYPTION_WITH_COLUMN_KEY_DEFAULT - } -return p.ENCRYPTION_WITH_COLUMN_KEY + if !p.IsSetENCRYPTION_WITH_COLUMN_KEY() { + return ColumnCryptoMetaData_ENCRYPTION_WITH_COLUMN_KEY_DEFAULT + } + return p.ENCRYPTION_WITH_COLUMN_KEY } func (p *ColumnCryptoMetaData) CountSetFieldsColumnCryptoMetaData() int { - count := 0 - if (p.IsSetENCRYPTION_WITH_FOOTER_KEY()) { - count++ - } - if (p.IsSetENCRYPTION_WITH_COLUMN_KEY()) { - count++ - } - return count + count := 0 + if p.IsSetENCRYPTION_WITH_FOOTER_KEY() { + count++ + } + if p.IsSetENCRYPTION_WITH_COLUMN_KEY() { + count++ + } + return count } func (p *ColumnCryptoMetaData) IsSetENCRYPTION_WITH_FOOTER_KEY() bool { - return p.ENCRYPTION_WITH_FOOTER_KEY != nil + return p.ENCRYPTION_WITH_FOOTER_KEY != nil } func (p *ColumnCryptoMetaData) IsSetENCRYPTION_WITH_COLUMN_KEY() bool { - return p.ENCRYPTION_WITH_COLUMN_KEY != nil + return p.ENCRYPTION_WITH_COLUMN_KEY != nil } func (p *ColumnCryptoMetaData) Read(ctx context.Context, iprot thrift.TProtocol) error { - if _, err := iprot.ReadStructBegin(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) - } - - - for { - _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) - if err != nil { - return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) - } - if fieldTypeId == thrift.STOP { break; } - switch fieldId { - case 1: - if fieldTypeId == thrift.STRUCT { - if err := p.ReadField1(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 2: - if fieldTypeId == thrift.STRUCT { - if err := p.ReadField2(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - default: - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - if err := iprot.ReadFieldEnd(ctx); err != nil { - return err - } - } - if err := iprot.ReadStructEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) - } - return nil -} - -func (p *ColumnCryptoMetaData) ReadField1(ctx context.Context, iprot thrift.TProtocol) error { - p.ENCRYPTION_WITH_FOOTER_KEY = &EncryptionWithFooterKey{} - if err := p.ENCRYPTION_WITH_FOOTER_KEY.Read(ctx, iprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.ENCRYPTION_WITH_FOOTER_KEY), err) - } - return nil -} - -func (p *ColumnCryptoMetaData) ReadField2(ctx context.Context, iprot thrift.TProtocol) error { - p.ENCRYPTION_WITH_COLUMN_KEY = &EncryptionWithColumnKey{} - if err := p.ENCRYPTION_WITH_COLUMN_KEY.Read(ctx, iprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.ENCRYPTION_WITH_COLUMN_KEY), err) - } - return nil + if _, err := iprot.ReadStructBegin(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) + } + + for { + _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) + if err != nil { + return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) + } + if fieldTypeId == thrift.STOP { + break + } + switch fieldId { + case 1: + if fieldTypeId == thrift.STRUCT { + if err := p.ReadField1(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 2: + if fieldTypeId == thrift.STRUCT { + if err := p.ReadField2(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + default: + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + if err := iprot.ReadFieldEnd(ctx); err != nil { + return err + } + } + if err := iprot.ReadStructEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) + } + return nil +} + +func (p *ColumnCryptoMetaData) ReadField1(ctx context.Context, iprot thrift.TProtocol) error { + p.ENCRYPTION_WITH_FOOTER_KEY = &EncryptionWithFooterKey{} + if err := p.ENCRYPTION_WITH_FOOTER_KEY.Read(ctx, iprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.ENCRYPTION_WITH_FOOTER_KEY), err) + } + return nil +} + +func (p *ColumnCryptoMetaData) ReadField2(ctx context.Context, iprot thrift.TProtocol) error { + p.ENCRYPTION_WITH_COLUMN_KEY = &EncryptionWithColumnKey{} + if err := p.ENCRYPTION_WITH_COLUMN_KEY.Read(ctx, iprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.ENCRYPTION_WITH_COLUMN_KEY), err) + } + return nil } func (p *ColumnCryptoMetaData) Write(ctx context.Context, oprot thrift.TProtocol) error { - if c := p.CountSetFieldsColumnCryptoMetaData(); c != 1 { - return fmt.Errorf("%T write union: exactly one field must be set (%d set)", p, c) - } - if err := oprot.WriteStructBegin(ctx, "ColumnCryptoMetaData"); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) } - if p != nil { - if err := p.writeField1(ctx, oprot); err != nil { return err } - if err := p.writeField2(ctx, oprot); err != nil { return err } - } - if err := oprot.WriteFieldStop(ctx); err != nil { - return thrift.PrependError("write field stop error: ", err) } - if err := oprot.WriteStructEnd(ctx); err != nil { - return thrift.PrependError("write struct stop error: ", err) } - return nil + if c := p.CountSetFieldsColumnCryptoMetaData(); c != 1 { + return fmt.Errorf("%T write union: exactly one field must be set (%d set)", p, c) + } + if err := oprot.WriteStructBegin(ctx, "ColumnCryptoMetaData"); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) + } + if p != nil { + if err := p.writeField1(ctx, oprot); err != nil { + return err + } + if err := p.writeField2(ctx, oprot); err != nil { + return err + } + } + if err := oprot.WriteFieldStop(ctx); err != nil { + return thrift.PrependError("write field stop error: ", err) + } + if err := oprot.WriteStructEnd(ctx); err != nil { + return thrift.PrependError("write struct stop error: ", err) + } + return nil } func (p *ColumnCryptoMetaData) writeField1(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetENCRYPTION_WITH_FOOTER_KEY() { - if err := oprot.WriteFieldBegin(ctx, "ENCRYPTION_WITH_FOOTER_KEY", thrift.STRUCT, 1); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:ENCRYPTION_WITH_FOOTER_KEY: ", p), err) } - if err := p.ENCRYPTION_WITH_FOOTER_KEY.Write(ctx, oprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.ENCRYPTION_WITH_FOOTER_KEY), err) - } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 1:ENCRYPTION_WITH_FOOTER_KEY: ", p), err) } - } - return err + if p.IsSetENCRYPTION_WITH_FOOTER_KEY() { + if err := oprot.WriteFieldBegin(ctx, "ENCRYPTION_WITH_FOOTER_KEY", thrift.STRUCT, 1); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:ENCRYPTION_WITH_FOOTER_KEY: ", p), err) + } + if err := p.ENCRYPTION_WITH_FOOTER_KEY.Write(ctx, oprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.ENCRYPTION_WITH_FOOTER_KEY), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 1:ENCRYPTION_WITH_FOOTER_KEY: ", p), err) + } + } + return err } func (p *ColumnCryptoMetaData) writeField2(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetENCRYPTION_WITH_COLUMN_KEY() { - if err := oprot.WriteFieldBegin(ctx, "ENCRYPTION_WITH_COLUMN_KEY", thrift.STRUCT, 2); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:ENCRYPTION_WITH_COLUMN_KEY: ", p), err) } - if err := p.ENCRYPTION_WITH_COLUMN_KEY.Write(ctx, oprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.ENCRYPTION_WITH_COLUMN_KEY), err) - } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 2:ENCRYPTION_WITH_COLUMN_KEY: ", p), err) } - } - return err + if p.IsSetENCRYPTION_WITH_COLUMN_KEY() { + if err := oprot.WriteFieldBegin(ctx, "ENCRYPTION_WITH_COLUMN_KEY", thrift.STRUCT, 2); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:ENCRYPTION_WITH_COLUMN_KEY: ", p), err) + } + if err := p.ENCRYPTION_WITH_COLUMN_KEY.Write(ctx, oprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.ENCRYPTION_WITH_COLUMN_KEY), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 2:ENCRYPTION_WITH_COLUMN_KEY: ", p), err) + } + } + return err } func (p *ColumnCryptoMetaData) Equals(other *ColumnCryptoMetaData) bool { - if p == other { - return true - } else if p == nil || other == nil { - return false - } - if !p.ENCRYPTION_WITH_FOOTER_KEY.Equals(other.ENCRYPTION_WITH_FOOTER_KEY) { return false } - if !p.ENCRYPTION_WITH_COLUMN_KEY.Equals(other.ENCRYPTION_WITH_COLUMN_KEY) { return false } - return true + if p == other { + return true + } else if p == nil || other == nil { + return false + } + if !p.ENCRYPTION_WITH_FOOTER_KEY.Equals(other.ENCRYPTION_WITH_FOOTER_KEY) { + return false + } + if !p.ENCRYPTION_WITH_COLUMN_KEY.Equals(other.ENCRYPTION_WITH_COLUMN_KEY) { + return false + } + return true } func (p *ColumnCryptoMetaData) String() string { - if p == nil { - return "" - } - return fmt.Sprintf("ColumnCryptoMetaData(%+v)", *p) + if p == nil { + return "" + } + return fmt.Sprintf("ColumnCryptoMetaData(%+v)", *p) } func (p *ColumnCryptoMetaData) Validate() error { - return nil + return nil } + // Attributes: -// - FilePath: File where column data is stored. If not set, assumed to be same file as +// - FilePath: File where column data is stored. If not set, assumed to be same file as +// // metadata. This path is relative to the current file. -// -// - FileOffset: Byte offset in file_path to the ColumnMetaData * -// - MetaData: Column metadata for this chunk. This is the same content as what is at +// +// - FileOffset: Byte offset in file_path to the ColumnMetaData * +// - MetaData: Column metadata for this chunk. This is the same content as what is at +// // file_path/file_offset. Having it here has it replicated in the file // metadata. -// -// - OffsetIndexOffset: File offset of ColumnChunk's OffsetIndex * -// - OffsetIndexLength: Size of ColumnChunk's OffsetIndex, in bytes * -// - ColumnIndexOffset: File offset of ColumnChunk's ColumnIndex * -// - ColumnIndexLength: Size of ColumnChunk's ColumnIndex, in bytes * -// - CryptoMetadata: Crypto metadata of encrypted columns * -// - EncryptedColumnMetadata: Encrypted column metadata for this chunk * +// +// - OffsetIndexOffset: File offset of ColumnChunk's OffsetIndex * +// - OffsetIndexLength: Size of ColumnChunk's OffsetIndex, in bytes * +// - ColumnIndexOffset: File offset of ColumnChunk's ColumnIndex * +// - ColumnIndexLength: Size of ColumnChunk's ColumnIndex, in bytes * +// - CryptoMetadata: Crypto metadata of encrypted columns * +// - EncryptedColumnMetadata: Encrypted column metadata for this chunk * type ColumnChunk struct { - FilePath *string `thrift:"file_path,1" db:"file_path" json:"file_path,omitempty"` - FileOffset int64 `thrift:"file_offset,2,required" db:"file_offset" json:"file_offset"` - MetaData *ColumnMetaData `thrift:"meta_data,3" db:"meta_data" json:"meta_data,omitempty"` - OffsetIndexOffset *int64 `thrift:"offset_index_offset,4" db:"offset_index_offset" json:"offset_index_offset,omitempty"` - OffsetIndexLength *int32 `thrift:"offset_index_length,5" db:"offset_index_length" json:"offset_index_length,omitempty"` - ColumnIndexOffset *int64 `thrift:"column_index_offset,6" db:"column_index_offset" json:"column_index_offset,omitempty"` - ColumnIndexLength *int32 `thrift:"column_index_length,7" db:"column_index_length" json:"column_index_length,omitempty"` - CryptoMetadata *ColumnCryptoMetaData `thrift:"crypto_metadata,8" db:"crypto_metadata" json:"crypto_metadata,omitempty"` - EncryptedColumnMetadata []byte `thrift:"encrypted_column_metadata,9" db:"encrypted_column_metadata" json:"encrypted_column_metadata,omitempty"` + FilePath *string `thrift:"file_path,1" db:"file_path" json:"file_path,omitempty"` + FileOffset int64 `thrift:"file_offset,2,required" db:"file_offset" json:"file_offset"` + MetaData *ColumnMetaData `thrift:"meta_data,3" db:"meta_data" json:"meta_data,omitempty"` + OffsetIndexOffset *int64 `thrift:"offset_index_offset,4" db:"offset_index_offset" json:"offset_index_offset,omitempty"` + OffsetIndexLength *int32 `thrift:"offset_index_length,5" db:"offset_index_length" json:"offset_index_length,omitempty"` + ColumnIndexOffset *int64 `thrift:"column_index_offset,6" db:"column_index_offset" json:"column_index_offset,omitempty"` + ColumnIndexLength *int32 `thrift:"column_index_length,7" db:"column_index_length" json:"column_index_length,omitempty"` + CryptoMetadata *ColumnCryptoMetaData `thrift:"crypto_metadata,8" db:"crypto_metadata" json:"crypto_metadata,omitempty"` + EncryptedColumnMetadata []byte `thrift:"encrypted_column_metadata,9" db:"encrypted_column_metadata" json:"encrypted_column_metadata,omitempty"` } func NewColumnChunk() *ColumnChunk { - return &ColumnChunk{} + return &ColumnChunk{} } var ColumnChunk_FilePath_DEFAULT string + func (p *ColumnChunk) GetFilePath() string { - if !p.IsSetFilePath() { - return ColumnChunk_FilePath_DEFAULT - } -return *p.FilePath + if !p.IsSetFilePath() { + return ColumnChunk_FilePath_DEFAULT + } + return *p.FilePath } func (p *ColumnChunk) GetFileOffset() int64 { - return p.FileOffset + return p.FileOffset } + var ColumnChunk_MetaData_DEFAULT *ColumnMetaData + func (p *ColumnChunk) GetMetaData() *ColumnMetaData { - if !p.IsSetMetaData() { - return ColumnChunk_MetaData_DEFAULT - } -return p.MetaData + if !p.IsSetMetaData() { + return ColumnChunk_MetaData_DEFAULT + } + return p.MetaData } + var ColumnChunk_OffsetIndexOffset_DEFAULT int64 + func (p *ColumnChunk) GetOffsetIndexOffset() int64 { - if !p.IsSetOffsetIndexOffset() { - return ColumnChunk_OffsetIndexOffset_DEFAULT - } -return *p.OffsetIndexOffset + if !p.IsSetOffsetIndexOffset() { + return ColumnChunk_OffsetIndexOffset_DEFAULT + } + return *p.OffsetIndexOffset } + var ColumnChunk_OffsetIndexLength_DEFAULT int32 + func (p *ColumnChunk) GetOffsetIndexLength() int32 { - if !p.IsSetOffsetIndexLength() { - return ColumnChunk_OffsetIndexLength_DEFAULT - } -return *p.OffsetIndexLength + if !p.IsSetOffsetIndexLength() { + return ColumnChunk_OffsetIndexLength_DEFAULT + } + return *p.OffsetIndexLength } + var ColumnChunk_ColumnIndexOffset_DEFAULT int64 + func (p *ColumnChunk) GetColumnIndexOffset() int64 { - if !p.IsSetColumnIndexOffset() { - return ColumnChunk_ColumnIndexOffset_DEFAULT - } -return *p.ColumnIndexOffset + if !p.IsSetColumnIndexOffset() { + return ColumnChunk_ColumnIndexOffset_DEFAULT + } + return *p.ColumnIndexOffset } + var ColumnChunk_ColumnIndexLength_DEFAULT int32 + func (p *ColumnChunk) GetColumnIndexLength() int32 { - if !p.IsSetColumnIndexLength() { - return ColumnChunk_ColumnIndexLength_DEFAULT - } -return *p.ColumnIndexLength + if !p.IsSetColumnIndexLength() { + return ColumnChunk_ColumnIndexLength_DEFAULT + } + return *p.ColumnIndexLength } + var ColumnChunk_CryptoMetadata_DEFAULT *ColumnCryptoMetaData + func (p *ColumnChunk) GetCryptoMetadata() *ColumnCryptoMetaData { - if !p.IsSetCryptoMetadata() { - return ColumnChunk_CryptoMetadata_DEFAULT - } -return p.CryptoMetadata + if !p.IsSetCryptoMetadata() { + return ColumnChunk_CryptoMetadata_DEFAULT + } + return p.CryptoMetadata } + var ColumnChunk_EncryptedColumnMetadata_DEFAULT []byte func (p *ColumnChunk) GetEncryptedColumnMetadata() []byte { - return p.EncryptedColumnMetadata + return p.EncryptedColumnMetadata } func (p *ColumnChunk) IsSetFilePath() bool { - return p.FilePath != nil + return p.FilePath != nil } func (p *ColumnChunk) IsSetMetaData() bool { - return p.MetaData != nil + return p.MetaData != nil } func (p *ColumnChunk) IsSetOffsetIndexOffset() bool { - return p.OffsetIndexOffset != nil + return p.OffsetIndexOffset != nil } func (p *ColumnChunk) IsSetOffsetIndexLength() bool { - return p.OffsetIndexLength != nil + return p.OffsetIndexLength != nil } func (p *ColumnChunk) IsSetColumnIndexOffset() bool { - return p.ColumnIndexOffset != nil + return p.ColumnIndexOffset != nil } func (p *ColumnChunk) IsSetColumnIndexLength() bool { - return p.ColumnIndexLength != nil + return p.ColumnIndexLength != nil } func (p *ColumnChunk) IsSetCryptoMetadata() bool { - return p.CryptoMetadata != nil + return p.CryptoMetadata != nil } func (p *ColumnChunk) IsSetEncryptedColumnMetadata() bool { - return p.EncryptedColumnMetadata != nil + return p.EncryptedColumnMetadata != nil } func (p *ColumnChunk) Read(ctx context.Context, iprot thrift.TProtocol) error { - if _, err := iprot.ReadStructBegin(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) - } - - var issetFileOffset bool = false; - - for { - _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) - if err != nil { - return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) - } - if fieldTypeId == thrift.STOP { break; } - switch fieldId { - case 1: - if fieldTypeId == thrift.STRING { - if err := p.ReadField1(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 2: - if fieldTypeId == thrift.I64 { - if err := p.ReadField2(ctx, iprot); err != nil { - return err - } - issetFileOffset = true - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 3: - if fieldTypeId == thrift.STRUCT { - if err := p.ReadField3(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 4: - if fieldTypeId == thrift.I64 { - if err := p.ReadField4(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 5: - if fieldTypeId == thrift.I32 { - if err := p.ReadField5(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 6: - if fieldTypeId == thrift.I64 { - if err := p.ReadField6(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 7: - if fieldTypeId == thrift.I32 { - if err := p.ReadField7(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 8: - if fieldTypeId == thrift.STRUCT { - if err := p.ReadField8(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 9: - if fieldTypeId == thrift.STRING { - if err := p.ReadField9(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - default: - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - if err := iprot.ReadFieldEnd(ctx); err != nil { - return err - } - } - if err := iprot.ReadStructEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) - } - if !issetFileOffset{ - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field FileOffset is not set")); - } - return nil -} - -func (p *ColumnChunk) ReadField1(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadString(ctx); err != nil { - return thrift.PrependError("error reading field 1: ", err) -} else { - p.FilePath = &v -} - return nil -} - -func (p *ColumnChunk) ReadField2(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadI64(ctx); err != nil { - return thrift.PrependError("error reading field 2: ", err) -} else { - p.FileOffset = v -} - return nil -} - -func (p *ColumnChunk) ReadField3(ctx context.Context, iprot thrift.TProtocol) error { - p.MetaData = &ColumnMetaData{} - if err := p.MetaData.Read(ctx, iprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.MetaData), err) - } - return nil -} - -func (p *ColumnChunk) ReadField4(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadI64(ctx); err != nil { - return thrift.PrependError("error reading field 4: ", err) -} else { - p.OffsetIndexOffset = &v -} - return nil -} - -func (p *ColumnChunk) ReadField5(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadI32(ctx); err != nil { - return thrift.PrependError("error reading field 5: ", err) -} else { - p.OffsetIndexLength = &v -} - return nil -} - -func (p *ColumnChunk) ReadField6(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadI64(ctx); err != nil { - return thrift.PrependError("error reading field 6: ", err) -} else { - p.ColumnIndexOffset = &v -} - return nil -} - -func (p *ColumnChunk) ReadField7(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadI32(ctx); err != nil { - return thrift.PrependError("error reading field 7: ", err) -} else { - p.ColumnIndexLength = &v -} - return nil -} - -func (p *ColumnChunk) ReadField8(ctx context.Context, iprot thrift.TProtocol) error { - p.CryptoMetadata = &ColumnCryptoMetaData{} - if err := p.CryptoMetadata.Read(ctx, iprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.CryptoMetadata), err) - } - return nil -} - -func (p *ColumnChunk) ReadField9(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadBinary(ctx); err != nil { - return thrift.PrependError("error reading field 9: ", err) -} else { - p.EncryptedColumnMetadata = v -} - return nil + if _, err := iprot.ReadStructBegin(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) + } + + var issetFileOffset bool = false + + for { + _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) + if err != nil { + return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) + } + if fieldTypeId == thrift.STOP { + break + } + switch fieldId { + case 1: + if fieldTypeId == thrift.STRING { + if err := p.ReadField1(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 2: + if fieldTypeId == thrift.I64 { + if err := p.ReadField2(ctx, iprot); err != nil { + return err + } + issetFileOffset = true + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 3: + if fieldTypeId == thrift.STRUCT { + if err := p.ReadField3(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 4: + if fieldTypeId == thrift.I64 { + if err := p.ReadField4(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 5: + if fieldTypeId == thrift.I32 { + if err := p.ReadField5(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 6: + if fieldTypeId == thrift.I64 { + if err := p.ReadField6(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 7: + if fieldTypeId == thrift.I32 { + if err := p.ReadField7(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 8: + if fieldTypeId == thrift.STRUCT { + if err := p.ReadField8(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 9: + if fieldTypeId == thrift.STRING { + if err := p.ReadField9(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + default: + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + if err := iprot.ReadFieldEnd(ctx); err != nil { + return err + } + } + if err := iprot.ReadStructEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) + } + if !issetFileOffset { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field FileOffset is not set")) + } + return nil +} + +func (p *ColumnChunk) ReadField1(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadString(ctx); err != nil { + return thrift.PrependError("error reading field 1: ", err) + } else { + p.FilePath = &v + } + return nil +} + +func (p *ColumnChunk) ReadField2(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadI64(ctx); err != nil { + return thrift.PrependError("error reading field 2: ", err) + } else { + p.FileOffset = v + } + return nil +} + +func (p *ColumnChunk) ReadField3(ctx context.Context, iprot thrift.TProtocol) error { + p.MetaData = &ColumnMetaData{} + if err := p.MetaData.Read(ctx, iprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.MetaData), err) + } + return nil +} + +func (p *ColumnChunk) ReadField4(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadI64(ctx); err != nil { + return thrift.PrependError("error reading field 4: ", err) + } else { + p.OffsetIndexOffset = &v + } + return nil +} + +func (p *ColumnChunk) ReadField5(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadI32(ctx); err != nil { + return thrift.PrependError("error reading field 5: ", err) + } else { + p.OffsetIndexLength = &v + } + return nil +} + +func (p *ColumnChunk) ReadField6(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadI64(ctx); err != nil { + return thrift.PrependError("error reading field 6: ", err) + } else { + p.ColumnIndexOffset = &v + } + return nil +} + +func (p *ColumnChunk) ReadField7(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadI32(ctx); err != nil { + return thrift.PrependError("error reading field 7: ", err) + } else { + p.ColumnIndexLength = &v + } + return nil +} + +func (p *ColumnChunk) ReadField8(ctx context.Context, iprot thrift.TProtocol) error { + p.CryptoMetadata = &ColumnCryptoMetaData{} + if err := p.CryptoMetadata.Read(ctx, iprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.CryptoMetadata), err) + } + return nil +} + +func (p *ColumnChunk) ReadField9(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadBinary(ctx); err != nil { + return thrift.PrependError("error reading field 9: ", err) + } else { + p.EncryptedColumnMetadata = v + } + return nil } func (p *ColumnChunk) Write(ctx context.Context, oprot thrift.TProtocol) error { - if err := oprot.WriteStructBegin(ctx, "ColumnChunk"); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) } - if p != nil { - if err := p.writeField1(ctx, oprot); err != nil { return err } - if err := p.writeField2(ctx, oprot); err != nil { return err } - if err := p.writeField3(ctx, oprot); err != nil { return err } - if err := p.writeField4(ctx, oprot); err != nil { return err } - if err := p.writeField5(ctx, oprot); err != nil { return err } - if err := p.writeField6(ctx, oprot); err != nil { return err } - if err := p.writeField7(ctx, oprot); err != nil { return err } - if err := p.writeField8(ctx, oprot); err != nil { return err } - if err := p.writeField9(ctx, oprot); err != nil { return err } - } - if err := oprot.WriteFieldStop(ctx); err != nil { - return thrift.PrependError("write field stop error: ", err) } - if err := oprot.WriteStructEnd(ctx); err != nil { - return thrift.PrependError("write struct stop error: ", err) } - return nil + if err := oprot.WriteStructBegin(ctx, "ColumnChunk"); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) + } + if p != nil { + if err := p.writeField1(ctx, oprot); err != nil { + return err + } + if err := p.writeField2(ctx, oprot); err != nil { + return err + } + if err := p.writeField3(ctx, oprot); err != nil { + return err + } + if err := p.writeField4(ctx, oprot); err != nil { + return err + } + if err := p.writeField5(ctx, oprot); err != nil { + return err + } + if err := p.writeField6(ctx, oprot); err != nil { + return err + } + if err := p.writeField7(ctx, oprot); err != nil { + return err + } + if err := p.writeField8(ctx, oprot); err != nil { + return err + } + if err := p.writeField9(ctx, oprot); err != nil { + return err + } + } + if err := oprot.WriteFieldStop(ctx); err != nil { + return thrift.PrependError("write field stop error: ", err) + } + if err := oprot.WriteStructEnd(ctx); err != nil { + return thrift.PrependError("write struct stop error: ", err) + } + return nil } func (p *ColumnChunk) writeField1(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetFilePath() { - if err := oprot.WriteFieldBegin(ctx, "file_path", thrift.STRING, 1); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:file_path: ", p), err) } - if err := oprot.WriteString(ctx, string(*p.FilePath)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.file_path (1) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 1:file_path: ", p), err) } - } - return err + if p.IsSetFilePath() { + if err := oprot.WriteFieldBegin(ctx, "file_path", thrift.STRING, 1); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:file_path: ", p), err) + } + if err := oprot.WriteString(ctx, string(*p.FilePath)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.file_path (1) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 1:file_path: ", p), err) + } + } + return err } func (p *ColumnChunk) writeField2(ctx context.Context, oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin(ctx, "file_offset", thrift.I64, 2); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:file_offset: ", p), err) } - if err := oprot.WriteI64(ctx, int64(p.FileOffset)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.file_offset (2) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 2:file_offset: ", p), err) } - return err + if err := oprot.WriteFieldBegin(ctx, "file_offset", thrift.I64, 2); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:file_offset: ", p), err) + } + if err := oprot.WriteI64(ctx, int64(p.FileOffset)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.file_offset (2) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 2:file_offset: ", p), err) + } + return err } func (p *ColumnChunk) writeField3(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetMetaData() { - if err := oprot.WriteFieldBegin(ctx, "meta_data", thrift.STRUCT, 3); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 3:meta_data: ", p), err) } - if err := p.MetaData.Write(ctx, oprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.MetaData), err) - } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 3:meta_data: ", p), err) } - } - return err + if p.IsSetMetaData() { + if err := oprot.WriteFieldBegin(ctx, "meta_data", thrift.STRUCT, 3); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 3:meta_data: ", p), err) + } + if err := p.MetaData.Write(ctx, oprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.MetaData), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 3:meta_data: ", p), err) + } + } + return err } func (p *ColumnChunk) writeField4(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetOffsetIndexOffset() { - if err := oprot.WriteFieldBegin(ctx, "offset_index_offset", thrift.I64, 4); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 4:offset_index_offset: ", p), err) } - if err := oprot.WriteI64(ctx, int64(*p.OffsetIndexOffset)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.offset_index_offset (4) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 4:offset_index_offset: ", p), err) } - } - return err + if p.IsSetOffsetIndexOffset() { + if err := oprot.WriteFieldBegin(ctx, "offset_index_offset", thrift.I64, 4); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 4:offset_index_offset: ", p), err) + } + if err := oprot.WriteI64(ctx, int64(*p.OffsetIndexOffset)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.offset_index_offset (4) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 4:offset_index_offset: ", p), err) + } + } + return err } func (p *ColumnChunk) writeField5(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetOffsetIndexLength() { - if err := oprot.WriteFieldBegin(ctx, "offset_index_length", thrift.I32, 5); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 5:offset_index_length: ", p), err) } - if err := oprot.WriteI32(ctx, int32(*p.OffsetIndexLength)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.offset_index_length (5) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 5:offset_index_length: ", p), err) } - } - return err + if p.IsSetOffsetIndexLength() { + if err := oprot.WriteFieldBegin(ctx, "offset_index_length", thrift.I32, 5); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 5:offset_index_length: ", p), err) + } + if err := oprot.WriteI32(ctx, int32(*p.OffsetIndexLength)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.offset_index_length (5) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 5:offset_index_length: ", p), err) + } + } + return err } func (p *ColumnChunk) writeField6(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetColumnIndexOffset() { - if err := oprot.WriteFieldBegin(ctx, "column_index_offset", thrift.I64, 6); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 6:column_index_offset: ", p), err) } - if err := oprot.WriteI64(ctx, int64(*p.ColumnIndexOffset)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.column_index_offset (6) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 6:column_index_offset: ", p), err) } - } - return err + if p.IsSetColumnIndexOffset() { + if err := oprot.WriteFieldBegin(ctx, "column_index_offset", thrift.I64, 6); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 6:column_index_offset: ", p), err) + } + if err := oprot.WriteI64(ctx, int64(*p.ColumnIndexOffset)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.column_index_offset (6) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 6:column_index_offset: ", p), err) + } + } + return err } func (p *ColumnChunk) writeField7(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetColumnIndexLength() { - if err := oprot.WriteFieldBegin(ctx, "column_index_length", thrift.I32, 7); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 7:column_index_length: ", p), err) } - if err := oprot.WriteI32(ctx, int32(*p.ColumnIndexLength)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.column_index_length (7) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 7:column_index_length: ", p), err) } - } - return err + if p.IsSetColumnIndexLength() { + if err := oprot.WriteFieldBegin(ctx, "column_index_length", thrift.I32, 7); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 7:column_index_length: ", p), err) + } + if err := oprot.WriteI32(ctx, int32(*p.ColumnIndexLength)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.column_index_length (7) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 7:column_index_length: ", p), err) + } + } + return err } func (p *ColumnChunk) writeField8(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetCryptoMetadata() { - if err := oprot.WriteFieldBegin(ctx, "crypto_metadata", thrift.STRUCT, 8); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 8:crypto_metadata: ", p), err) } - if err := p.CryptoMetadata.Write(ctx, oprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.CryptoMetadata), err) - } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 8:crypto_metadata: ", p), err) } - } - return err + if p.IsSetCryptoMetadata() { + if err := oprot.WriteFieldBegin(ctx, "crypto_metadata", thrift.STRUCT, 8); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 8:crypto_metadata: ", p), err) + } + if err := p.CryptoMetadata.Write(ctx, oprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.CryptoMetadata), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 8:crypto_metadata: ", p), err) + } + } + return err } func (p *ColumnChunk) writeField9(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetEncryptedColumnMetadata() { - if err := oprot.WriteFieldBegin(ctx, "encrypted_column_metadata", thrift.STRING, 9); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 9:encrypted_column_metadata: ", p), err) } - if err := oprot.WriteBinary(ctx, p.EncryptedColumnMetadata); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.encrypted_column_metadata (9) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 9:encrypted_column_metadata: ", p), err) } - } - return err + if p.IsSetEncryptedColumnMetadata() { + if err := oprot.WriteFieldBegin(ctx, "encrypted_column_metadata", thrift.STRING, 9); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 9:encrypted_column_metadata: ", p), err) + } + if err := oprot.WriteBinary(ctx, p.EncryptedColumnMetadata); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.encrypted_column_metadata (9) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 9:encrypted_column_metadata: ", p), err) + } + } + return err } func (p *ColumnChunk) Equals(other *ColumnChunk) bool { - if p == other { - return true - } else if p == nil || other == nil { - return false - } - if p.FilePath != other.FilePath { - if p.FilePath == nil || other.FilePath == nil { - return false - } - if (*p.FilePath) != (*other.FilePath) { return false } - } - if p.FileOffset != other.FileOffset { return false } - if !p.MetaData.Equals(other.MetaData) { return false } - if p.OffsetIndexOffset != other.OffsetIndexOffset { - if p.OffsetIndexOffset == nil || other.OffsetIndexOffset == nil { - return false - } - if (*p.OffsetIndexOffset) != (*other.OffsetIndexOffset) { return false } - } - if p.OffsetIndexLength != other.OffsetIndexLength { - if p.OffsetIndexLength == nil || other.OffsetIndexLength == nil { - return false - } - if (*p.OffsetIndexLength) != (*other.OffsetIndexLength) { return false } - } - if p.ColumnIndexOffset != other.ColumnIndexOffset { - if p.ColumnIndexOffset == nil || other.ColumnIndexOffset == nil { - return false - } - if (*p.ColumnIndexOffset) != (*other.ColumnIndexOffset) { return false } - } - if p.ColumnIndexLength != other.ColumnIndexLength { - if p.ColumnIndexLength == nil || other.ColumnIndexLength == nil { - return false - } - if (*p.ColumnIndexLength) != (*other.ColumnIndexLength) { return false } - } - if !p.CryptoMetadata.Equals(other.CryptoMetadata) { return false } - if bytes.Compare(p.EncryptedColumnMetadata, other.EncryptedColumnMetadata) != 0 { return false } - return true + if p == other { + return true + } else if p == nil || other == nil { + return false + } + if p.FilePath != other.FilePath { + if p.FilePath == nil || other.FilePath == nil { + return false + } + if (*p.FilePath) != (*other.FilePath) { + return false + } + } + if p.FileOffset != other.FileOffset { + return false + } + if !p.MetaData.Equals(other.MetaData) { + return false + } + if p.OffsetIndexOffset != other.OffsetIndexOffset { + if p.OffsetIndexOffset == nil || other.OffsetIndexOffset == nil { + return false + } + if (*p.OffsetIndexOffset) != (*other.OffsetIndexOffset) { + return false + } + } + if p.OffsetIndexLength != other.OffsetIndexLength { + if p.OffsetIndexLength == nil || other.OffsetIndexLength == nil { + return false + } + if (*p.OffsetIndexLength) != (*other.OffsetIndexLength) { + return false + } + } + if p.ColumnIndexOffset != other.ColumnIndexOffset { + if p.ColumnIndexOffset == nil || other.ColumnIndexOffset == nil { + return false + } + if (*p.ColumnIndexOffset) != (*other.ColumnIndexOffset) { + return false + } + } + if p.ColumnIndexLength != other.ColumnIndexLength { + if p.ColumnIndexLength == nil || other.ColumnIndexLength == nil { + return false + } + if (*p.ColumnIndexLength) != (*other.ColumnIndexLength) { + return false + } + } + if !p.CryptoMetadata.Equals(other.CryptoMetadata) { + return false + } + if bytes.Compare(p.EncryptedColumnMetadata, other.EncryptedColumnMetadata) != 0 { + return false + } + return true } func (p *ColumnChunk) String() string { - if p == nil { - return "" - } - return fmt.Sprintf("ColumnChunk(%+v)", *p) + if p == nil { + return "" + } + return fmt.Sprintf("ColumnChunk(%+v)", *p) } func (p *ColumnChunk) Validate() error { - return nil + return nil } + // Attributes: -// - Columns: Metadata for each column chunk in this row group. +// - Columns: Metadata for each column chunk in this row group. +// // This list must have the same order as the SchemaElement list in FileMetaData. -// -// - TotalByteSize: Total byte size of all the uncompressed column data in this row group * -// - NumRows: Number of rows in this row group * -// - SortingColumns: If set, specifies a sort ordering of the rows in this RowGroup. +// +// - TotalByteSize: Total byte size of all the uncompressed column data in this row group * +// - NumRows: Number of rows in this row group * +// - SortingColumns: If set, specifies a sort ordering of the rows in this RowGroup. +// // The sorting columns can be a subset of all the columns. -// - FileOffset: Byte offset from beginning of file to first page (data or dictionary) +// - FileOffset: Byte offset from beginning of file to first page (data or dictionary) +// // in this row group * -// - TotalCompressedSize: Total byte size of all compressed (and potentially encrypted) column data +// - TotalCompressedSize: Total byte size of all compressed (and potentially encrypted) column data +// // in this row group * -// - Ordinal: Row group ordinal in the file * +// - Ordinal: Row group ordinal in the file * type RowGroup struct { - Columns []*ColumnChunk `thrift:"columns,1,required" db:"columns" json:"columns"` - TotalByteSize int64 `thrift:"total_byte_size,2,required" db:"total_byte_size" json:"total_byte_size"` - NumRows int64 `thrift:"num_rows,3,required" db:"num_rows" json:"num_rows"` - SortingColumns []*SortingColumn `thrift:"sorting_columns,4" db:"sorting_columns" json:"sorting_columns,omitempty"` - FileOffset *int64 `thrift:"file_offset,5" db:"file_offset" json:"file_offset,omitempty"` - TotalCompressedSize *int64 `thrift:"total_compressed_size,6" db:"total_compressed_size" json:"total_compressed_size,omitempty"` - Ordinal *int16 `thrift:"ordinal,7" db:"ordinal" json:"ordinal,omitempty"` + Columns []*ColumnChunk `thrift:"columns,1,required" db:"columns" json:"columns"` + TotalByteSize int64 `thrift:"total_byte_size,2,required" db:"total_byte_size" json:"total_byte_size"` + NumRows int64 `thrift:"num_rows,3,required" db:"num_rows" json:"num_rows"` + SortingColumns []*SortingColumn `thrift:"sorting_columns,4" db:"sorting_columns" json:"sorting_columns,omitempty"` + FileOffset *int64 `thrift:"file_offset,5" db:"file_offset" json:"file_offset,omitempty"` + TotalCompressedSize *int64 `thrift:"total_compressed_size,6" db:"total_compressed_size" json:"total_compressed_size,omitempty"` + Ordinal *int16 `thrift:"ordinal,7" db:"ordinal" json:"ordinal,omitempty"` } func NewRowGroup() *RowGroup { - return &RowGroup{} + return &RowGroup{} } - func (p *RowGroup) GetColumns() []*ColumnChunk { - return p.Columns + return p.Columns } func (p *RowGroup) GetTotalByteSize() int64 { - return p.TotalByteSize + return p.TotalByteSize } func (p *RowGroup) GetNumRows() int64 { - return p.NumRows + return p.NumRows } + var RowGroup_SortingColumns_DEFAULT []*SortingColumn func (p *RowGroup) GetSortingColumns() []*SortingColumn { - return p.SortingColumns + return p.SortingColumns } + var RowGroup_FileOffset_DEFAULT int64 + func (p *RowGroup) GetFileOffset() int64 { - if !p.IsSetFileOffset() { - return RowGroup_FileOffset_DEFAULT - } -return *p.FileOffset + if !p.IsSetFileOffset() { + return RowGroup_FileOffset_DEFAULT + } + return *p.FileOffset } + var RowGroup_TotalCompressedSize_DEFAULT int64 + func (p *RowGroup) GetTotalCompressedSize() int64 { - if !p.IsSetTotalCompressedSize() { - return RowGroup_TotalCompressedSize_DEFAULT - } -return *p.TotalCompressedSize + if !p.IsSetTotalCompressedSize() { + return RowGroup_TotalCompressedSize_DEFAULT + } + return *p.TotalCompressedSize } + var RowGroup_Ordinal_DEFAULT int16 + func (p *RowGroup) GetOrdinal() int16 { - if !p.IsSetOrdinal() { - return RowGroup_Ordinal_DEFAULT - } -return *p.Ordinal + if !p.IsSetOrdinal() { + return RowGroup_Ordinal_DEFAULT + } + return *p.Ordinal } func (p *RowGroup) IsSetSortingColumns() bool { - return p.SortingColumns != nil + return p.SortingColumns != nil } func (p *RowGroup) IsSetFileOffset() bool { - return p.FileOffset != nil + return p.FileOffset != nil } func (p *RowGroup) IsSetTotalCompressedSize() bool { - return p.TotalCompressedSize != nil + return p.TotalCompressedSize != nil } func (p *RowGroup) IsSetOrdinal() bool { - return p.Ordinal != nil + return p.Ordinal != nil } func (p *RowGroup) Read(ctx context.Context, iprot thrift.TProtocol) error { - if _, err := iprot.ReadStructBegin(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) - } - - var issetColumns bool = false; - var issetTotalByteSize bool = false; - var issetNumRows bool = false; - - for { - _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) - if err != nil { - return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) - } - if fieldTypeId == thrift.STOP { break; } - switch fieldId { - case 1: - if fieldTypeId == thrift.LIST { - if err := p.ReadField1(ctx, iprot); err != nil { - return err - } - issetColumns = true - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 2: - if fieldTypeId == thrift.I64 { - if err := p.ReadField2(ctx, iprot); err != nil { - return err - } - issetTotalByteSize = true - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 3: - if fieldTypeId == thrift.I64 { - if err := p.ReadField3(ctx, iprot); err != nil { - return err - } - issetNumRows = true - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 4: - if fieldTypeId == thrift.LIST { - if err := p.ReadField4(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 5: - if fieldTypeId == thrift.I64 { - if err := p.ReadField5(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 6: - if fieldTypeId == thrift.I64 { - if err := p.ReadField6(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 7: - if fieldTypeId == thrift.I16 { - if err := p.ReadField7(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - default: - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - if err := iprot.ReadFieldEnd(ctx); err != nil { - return err - } - } - if err := iprot.ReadStructEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) - } - if !issetColumns{ - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Columns is not set")); - } - if !issetTotalByteSize{ - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field TotalByteSize is not set")); - } - if !issetNumRows{ - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field NumRows is not set")); - } - return nil -} - -func (p *RowGroup) ReadField1(ctx context.Context, iprot thrift.TProtocol) error { - _, size, err := iprot.ReadListBegin(ctx) - if err != nil { - return thrift.PrependError("error reading list begin: ", err) - } - tSlice := make([]*ColumnChunk, 0, size) - p.Columns = tSlice - for i := 0; i < size; i ++ { - _elem10 := &ColumnChunk{} - if err := _elem10.Read(ctx, iprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", _elem10), err) - } - p.Columns = append(p.Columns, _elem10) - } - if err := iprot.ReadListEnd(ctx); err != nil { - return thrift.PrependError("error reading list end: ", err) - } - return nil -} - -func (p *RowGroup) ReadField2(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadI64(ctx); err != nil { - return thrift.PrependError("error reading field 2: ", err) -} else { - p.TotalByteSize = v -} - return nil -} - -func (p *RowGroup) ReadField3(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadI64(ctx); err != nil { - return thrift.PrependError("error reading field 3: ", err) -} else { - p.NumRows = v -} - return nil -} - -func (p *RowGroup) ReadField4(ctx context.Context, iprot thrift.TProtocol) error { - _, size, err := iprot.ReadListBegin(ctx) - if err != nil { - return thrift.PrependError("error reading list begin: ", err) - } - tSlice := make([]*SortingColumn, 0, size) - p.SortingColumns = tSlice - for i := 0; i < size; i ++ { - _elem11 := &SortingColumn{} - if err := _elem11.Read(ctx, iprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", _elem11), err) - } - p.SortingColumns = append(p.SortingColumns, _elem11) - } - if err := iprot.ReadListEnd(ctx); err != nil { - return thrift.PrependError("error reading list end: ", err) - } - return nil -} - -func (p *RowGroup) ReadField5(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadI64(ctx); err != nil { - return thrift.PrependError("error reading field 5: ", err) -} else { - p.FileOffset = &v -} - return nil -} - -func (p *RowGroup) ReadField6(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadI64(ctx); err != nil { - return thrift.PrependError("error reading field 6: ", err) -} else { - p.TotalCompressedSize = &v -} - return nil -} - -func (p *RowGroup) ReadField7(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadI16(ctx); err != nil { - return thrift.PrependError("error reading field 7: ", err) -} else { - p.Ordinal = &v -} - return nil + if _, err := iprot.ReadStructBegin(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) + } + + var issetColumns bool = false + var issetTotalByteSize bool = false + var issetNumRows bool = false + + for { + _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) + if err != nil { + return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) + } + if fieldTypeId == thrift.STOP { + break + } + switch fieldId { + case 1: + if fieldTypeId == thrift.LIST { + if err := p.ReadField1(ctx, iprot); err != nil { + return err + } + issetColumns = true + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 2: + if fieldTypeId == thrift.I64 { + if err := p.ReadField2(ctx, iprot); err != nil { + return err + } + issetTotalByteSize = true + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 3: + if fieldTypeId == thrift.I64 { + if err := p.ReadField3(ctx, iprot); err != nil { + return err + } + issetNumRows = true + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 4: + if fieldTypeId == thrift.LIST { + if err := p.ReadField4(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 5: + if fieldTypeId == thrift.I64 { + if err := p.ReadField5(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 6: + if fieldTypeId == thrift.I64 { + if err := p.ReadField6(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 7: + if fieldTypeId == thrift.I16 { + if err := p.ReadField7(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + default: + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + if err := iprot.ReadFieldEnd(ctx); err != nil { + return err + } + } + if err := iprot.ReadStructEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) + } + if !issetColumns { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Columns is not set")) + } + if !issetTotalByteSize { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field TotalByteSize is not set")) + } + if !issetNumRows { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field NumRows is not set")) + } + return nil +} + +func (p *RowGroup) ReadField1(ctx context.Context, iprot thrift.TProtocol) error { + _, size, err := iprot.ReadListBegin(ctx) + if err != nil { + return thrift.PrependError("error reading list begin: ", err) + } + tSlice := make([]*ColumnChunk, 0, size) + p.Columns = tSlice + for i := 0; i < size; i++ { + _elem10 := &ColumnChunk{} + if err := _elem10.Read(ctx, iprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", _elem10), err) + } + p.Columns = append(p.Columns, _elem10) + } + if err := iprot.ReadListEnd(ctx); err != nil { + return thrift.PrependError("error reading list end: ", err) + } + return nil +} + +func (p *RowGroup) ReadField2(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadI64(ctx); err != nil { + return thrift.PrependError("error reading field 2: ", err) + } else { + p.TotalByteSize = v + } + return nil +} + +func (p *RowGroup) ReadField3(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadI64(ctx); err != nil { + return thrift.PrependError("error reading field 3: ", err) + } else { + p.NumRows = v + } + return nil +} + +func (p *RowGroup) ReadField4(ctx context.Context, iprot thrift.TProtocol) error { + _, size, err := iprot.ReadListBegin(ctx) + if err != nil { + return thrift.PrependError("error reading list begin: ", err) + } + tSlice := make([]*SortingColumn, 0, size) + p.SortingColumns = tSlice + for i := 0; i < size; i++ { + _elem11 := &SortingColumn{} + if err := _elem11.Read(ctx, iprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", _elem11), err) + } + p.SortingColumns = append(p.SortingColumns, _elem11) + } + if err := iprot.ReadListEnd(ctx); err != nil { + return thrift.PrependError("error reading list end: ", err) + } + return nil +} + +func (p *RowGroup) ReadField5(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadI64(ctx); err != nil { + return thrift.PrependError("error reading field 5: ", err) + } else { + p.FileOffset = &v + } + return nil +} + +func (p *RowGroup) ReadField6(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadI64(ctx); err != nil { + return thrift.PrependError("error reading field 6: ", err) + } else { + p.TotalCompressedSize = &v + } + return nil +} + +func (p *RowGroup) ReadField7(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadI16(ctx); err != nil { + return thrift.PrependError("error reading field 7: ", err) + } else { + p.Ordinal = &v + } + return nil } func (p *RowGroup) Write(ctx context.Context, oprot thrift.TProtocol) error { - if err := oprot.WriteStructBegin(ctx, "RowGroup"); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) } - if p != nil { - if err := p.writeField1(ctx, oprot); err != nil { return err } - if err := p.writeField2(ctx, oprot); err != nil { return err } - if err := p.writeField3(ctx, oprot); err != nil { return err } - if err := p.writeField4(ctx, oprot); err != nil { return err } - if err := p.writeField5(ctx, oprot); err != nil { return err } - if err := p.writeField6(ctx, oprot); err != nil { return err } - if err := p.writeField7(ctx, oprot); err != nil { return err } - } - if err := oprot.WriteFieldStop(ctx); err != nil { - return thrift.PrependError("write field stop error: ", err) } - if err := oprot.WriteStructEnd(ctx); err != nil { - return thrift.PrependError("write struct stop error: ", err) } - return nil + if err := oprot.WriteStructBegin(ctx, "RowGroup"); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) + } + if p != nil { + if err := p.writeField1(ctx, oprot); err != nil { + return err + } + if err := p.writeField2(ctx, oprot); err != nil { + return err + } + if err := p.writeField3(ctx, oprot); err != nil { + return err + } + if err := p.writeField4(ctx, oprot); err != nil { + return err + } + if err := p.writeField5(ctx, oprot); err != nil { + return err + } + if err := p.writeField6(ctx, oprot); err != nil { + return err + } + if err := p.writeField7(ctx, oprot); err != nil { + return err + } + } + if err := oprot.WriteFieldStop(ctx); err != nil { + return thrift.PrependError("write field stop error: ", err) + } + if err := oprot.WriteStructEnd(ctx); err != nil { + return thrift.PrependError("write struct stop error: ", err) + } + return nil } func (p *RowGroup) writeField1(ctx context.Context, oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin(ctx, "columns", thrift.LIST, 1); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:columns: ", p), err) } - if err := oprot.WriteListBegin(ctx, thrift.STRUCT, len(p.Columns)); err != nil { - return thrift.PrependError("error writing list begin: ", err) - } - for _, v := range p.Columns { - if err := v.Write(ctx, oprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", v), err) - } - } - if err := oprot.WriteListEnd(ctx); err != nil { - return thrift.PrependError("error writing list end: ", err) - } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 1:columns: ", p), err) } - return err + if err := oprot.WriteFieldBegin(ctx, "columns", thrift.LIST, 1); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:columns: ", p), err) + } + if err := oprot.WriteListBegin(ctx, thrift.STRUCT, len(p.Columns)); err != nil { + return thrift.PrependError("error writing list begin: ", err) + } + for _, v := range p.Columns { + if err := v.Write(ctx, oprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", v), err) + } + } + if err := oprot.WriteListEnd(ctx); err != nil { + return thrift.PrependError("error writing list end: ", err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 1:columns: ", p), err) + } + return err } func (p *RowGroup) writeField2(ctx context.Context, oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin(ctx, "total_byte_size", thrift.I64, 2); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:total_byte_size: ", p), err) } - if err := oprot.WriteI64(ctx, int64(p.TotalByteSize)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.total_byte_size (2) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 2:total_byte_size: ", p), err) } - return err + if err := oprot.WriteFieldBegin(ctx, "total_byte_size", thrift.I64, 2); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:total_byte_size: ", p), err) + } + if err := oprot.WriteI64(ctx, int64(p.TotalByteSize)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.total_byte_size (2) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 2:total_byte_size: ", p), err) + } + return err } func (p *RowGroup) writeField3(ctx context.Context, oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin(ctx, "num_rows", thrift.I64, 3); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 3:num_rows: ", p), err) } - if err := oprot.WriteI64(ctx, int64(p.NumRows)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.num_rows (3) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 3:num_rows: ", p), err) } - return err + if err := oprot.WriteFieldBegin(ctx, "num_rows", thrift.I64, 3); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 3:num_rows: ", p), err) + } + if err := oprot.WriteI64(ctx, int64(p.NumRows)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.num_rows (3) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 3:num_rows: ", p), err) + } + return err } func (p *RowGroup) writeField4(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetSortingColumns() { - if err := oprot.WriteFieldBegin(ctx, "sorting_columns", thrift.LIST, 4); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 4:sorting_columns: ", p), err) } - if err := oprot.WriteListBegin(ctx, thrift.STRUCT, len(p.SortingColumns)); err != nil { - return thrift.PrependError("error writing list begin: ", err) - } - for _, v := range p.SortingColumns { - if err := v.Write(ctx, oprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", v), err) - } - } - if err := oprot.WriteListEnd(ctx); err != nil { - return thrift.PrependError("error writing list end: ", err) - } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 4:sorting_columns: ", p), err) } - } - return err + if p.IsSetSortingColumns() { + if err := oprot.WriteFieldBegin(ctx, "sorting_columns", thrift.LIST, 4); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 4:sorting_columns: ", p), err) + } + if err := oprot.WriteListBegin(ctx, thrift.STRUCT, len(p.SortingColumns)); err != nil { + return thrift.PrependError("error writing list begin: ", err) + } + for _, v := range p.SortingColumns { + if err := v.Write(ctx, oprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", v), err) + } + } + if err := oprot.WriteListEnd(ctx); err != nil { + return thrift.PrependError("error writing list end: ", err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 4:sorting_columns: ", p), err) + } + } + return err } func (p *RowGroup) writeField5(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetFileOffset() { - if err := oprot.WriteFieldBegin(ctx, "file_offset", thrift.I64, 5); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 5:file_offset: ", p), err) } - if err := oprot.WriteI64(ctx, int64(*p.FileOffset)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.file_offset (5) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 5:file_offset: ", p), err) } - } - return err + if p.IsSetFileOffset() { + if err := oprot.WriteFieldBegin(ctx, "file_offset", thrift.I64, 5); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 5:file_offset: ", p), err) + } + if err := oprot.WriteI64(ctx, int64(*p.FileOffset)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.file_offset (5) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 5:file_offset: ", p), err) + } + } + return err } func (p *RowGroup) writeField6(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetTotalCompressedSize() { - if err := oprot.WriteFieldBegin(ctx, "total_compressed_size", thrift.I64, 6); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 6:total_compressed_size: ", p), err) } - if err := oprot.WriteI64(ctx, int64(*p.TotalCompressedSize)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.total_compressed_size (6) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 6:total_compressed_size: ", p), err) } - } - return err + if p.IsSetTotalCompressedSize() { + if err := oprot.WriteFieldBegin(ctx, "total_compressed_size", thrift.I64, 6); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 6:total_compressed_size: ", p), err) + } + if err := oprot.WriteI64(ctx, int64(*p.TotalCompressedSize)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.total_compressed_size (6) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 6:total_compressed_size: ", p), err) + } + } + return err } func (p *RowGroup) writeField7(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetOrdinal() { - if err := oprot.WriteFieldBegin(ctx, "ordinal", thrift.I16, 7); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 7:ordinal: ", p), err) } - if err := oprot.WriteI16(ctx, int16(*p.Ordinal)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.ordinal (7) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 7:ordinal: ", p), err) } - } - return err + if p.IsSetOrdinal() { + if err := oprot.WriteFieldBegin(ctx, "ordinal", thrift.I16, 7); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 7:ordinal: ", p), err) + } + if err := oprot.WriteI16(ctx, int16(*p.Ordinal)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.ordinal (7) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 7:ordinal: ", p), err) + } + } + return err } func (p *RowGroup) Equals(other *RowGroup) bool { - if p == other { - return true - } else if p == nil || other == nil { - return false - } - if len(p.Columns) != len(other.Columns) { return false } - for i, _tgt := range p.Columns { - _src12 := other.Columns[i] - if !_tgt.Equals(_src12) { return false } - } - if p.TotalByteSize != other.TotalByteSize { return false } - if p.NumRows != other.NumRows { return false } - if len(p.SortingColumns) != len(other.SortingColumns) { return false } - for i, _tgt := range p.SortingColumns { - _src13 := other.SortingColumns[i] - if !_tgt.Equals(_src13) { return false } - } - if p.FileOffset != other.FileOffset { - if p.FileOffset == nil || other.FileOffset == nil { - return false - } - if (*p.FileOffset) != (*other.FileOffset) { return false } - } - if p.TotalCompressedSize != other.TotalCompressedSize { - if p.TotalCompressedSize == nil || other.TotalCompressedSize == nil { - return false - } - if (*p.TotalCompressedSize) != (*other.TotalCompressedSize) { return false } - } - if p.Ordinal != other.Ordinal { - if p.Ordinal == nil || other.Ordinal == nil { - return false - } - if (*p.Ordinal) != (*other.Ordinal) { return false } - } - return true + if p == other { + return true + } else if p == nil || other == nil { + return false + } + if len(p.Columns) != len(other.Columns) { + return false + } + for i, _tgt := range p.Columns { + _src12 := other.Columns[i] + if !_tgt.Equals(_src12) { + return false + } + } + if p.TotalByteSize != other.TotalByteSize { + return false + } + if p.NumRows != other.NumRows { + return false + } + if len(p.SortingColumns) != len(other.SortingColumns) { + return false + } + for i, _tgt := range p.SortingColumns { + _src13 := other.SortingColumns[i] + if !_tgt.Equals(_src13) { + return false + } + } + if p.FileOffset != other.FileOffset { + if p.FileOffset == nil || other.FileOffset == nil { + return false + } + if (*p.FileOffset) != (*other.FileOffset) { + return false + } + } + if p.TotalCompressedSize != other.TotalCompressedSize { + if p.TotalCompressedSize == nil || other.TotalCompressedSize == nil { + return false + } + if (*p.TotalCompressedSize) != (*other.TotalCompressedSize) { + return false + } + } + if p.Ordinal != other.Ordinal { + if p.Ordinal == nil || other.Ordinal == nil { + return false + } + if (*p.Ordinal) != (*other.Ordinal) { + return false + } + } + return true } func (p *RowGroup) String() string { - if p == nil { - return "" - } - return fmt.Sprintf("RowGroup(%+v)", *p) + if p == nil { + return "" + } + return fmt.Sprintf("RowGroup(%+v)", *p) } func (p *RowGroup) Validate() error { - return nil + return nil } + // Empty struct to signal the order defined by the physical or logical type type TypeDefinedOrder struct { } func NewTypeDefinedOrder() *TypeDefinedOrder { - return &TypeDefinedOrder{} + return &TypeDefinedOrder{} } func (p *TypeDefinedOrder) Read(ctx context.Context, iprot thrift.TProtocol) error { - if _, err := iprot.ReadStructBegin(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) - } - - - for { - _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) - if err != nil { - return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) - } - if fieldTypeId == thrift.STOP { break; } - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - if err := iprot.ReadFieldEnd(ctx); err != nil { - return err - } - } - if err := iprot.ReadStructEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) - } - return nil + if _, err := iprot.ReadStructBegin(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) + } + + for { + _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) + if err != nil { + return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) + } + if fieldTypeId == thrift.STOP { + break + } + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + if err := iprot.ReadFieldEnd(ctx); err != nil { + return err + } + } + if err := iprot.ReadStructEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) + } + return nil } func (p *TypeDefinedOrder) Write(ctx context.Context, oprot thrift.TProtocol) error { - if err := oprot.WriteStructBegin(ctx, "TypeDefinedOrder"); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) } - if p != nil { - } - if err := oprot.WriteFieldStop(ctx); err != nil { - return thrift.PrependError("write field stop error: ", err) } - if err := oprot.WriteStructEnd(ctx); err != nil { - return thrift.PrependError("write struct stop error: ", err) } - return nil + if err := oprot.WriteStructBegin(ctx, "TypeDefinedOrder"); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) + } + if p != nil { + } + if err := oprot.WriteFieldStop(ctx); err != nil { + return thrift.PrependError("write field stop error: ", err) + } + if err := oprot.WriteStructEnd(ctx); err != nil { + return thrift.PrependError("write struct stop error: ", err) + } + return nil } func (p *TypeDefinedOrder) Equals(other *TypeDefinedOrder) bool { - if p == other { - return true - } else if p == nil || other == nil { - return false - } - return true + if p == other { + return true + } else if p == nil || other == nil { + return false + } + return true } func (p *TypeDefinedOrder) String() string { - if p == nil { - return "" - } - return fmt.Sprintf("TypeDefinedOrder(%+v)", *p) + if p == nil { + return "" + } + return fmt.Sprintf("TypeDefinedOrder(%+v)", *p) } func (p *TypeDefinedOrder) Validate() error { - return nil + return nil } + // Union to specify the order used for the min_value and max_value fields for a // column. This union takes the role of an enhanced enum that allows rich // elements (which will be needed for a collation-based ordering in the future). -// +// // Possible values are: -// * TypeDefinedOrder - the column uses the order defined by its logical or -// physical type (if there is no logical type). -// +// - TypeDefinedOrder - the column uses the order defined by its logical or +// physical type (if there is no logical type). +// // If the reader does not support the value of this union, min and max stats // for this column should be ignored. -// +// // Attributes: -// - TYPE_ORDER: The sort orders for logical types are: -// UTF8 - unsigned byte-wise comparison -// INT8 - signed comparison -// INT16 - signed comparison -// INT32 - signed comparison -// INT64 - signed comparison -// UINT8 - unsigned comparison -// UINT16 - unsigned comparison -// UINT32 - unsigned comparison -// UINT64 - unsigned comparison -// DECIMAL - signed comparison of the represented value -// DATE - signed comparison -// TIME_MILLIS - signed comparison -// TIME_MICROS - signed comparison -// TIMESTAMP_MILLIS - signed comparison -// TIMESTAMP_MICROS - signed comparison -// INTERVAL - unsigned comparison -// JSON - unsigned byte-wise comparison -// BSON - unsigned byte-wise comparison -// ENUM - unsigned byte-wise comparison -// LIST - undefined -// MAP - undefined -// +// - TYPE_ORDER: The sort orders for logical types are: +// UTF8 - unsigned byte-wise comparison +// INT8 - signed comparison +// INT16 - signed comparison +// INT32 - signed comparison +// INT64 - signed comparison +// UINT8 - unsigned comparison +// UINT16 - unsigned comparison +// UINT32 - unsigned comparison +// UINT64 - unsigned comparison +// DECIMAL - signed comparison of the represented value +// DATE - signed comparison +// TIME_MILLIS - signed comparison +// TIME_MICROS - signed comparison +// TIMESTAMP_MILLIS - signed comparison +// TIMESTAMP_MICROS - signed comparison +// INTERVAL - unsigned comparison +// JSON - unsigned byte-wise comparison +// BSON - unsigned byte-wise comparison +// ENUM - unsigned byte-wise comparison +// LIST - undefined +// MAP - undefined +// // In the absence of logical types, the sort order is determined by the physical type: -// BOOLEAN - false, true -// INT32 - signed comparison -// INT64 - signed comparison -// INT96 (only used for legacy timestamps) - undefined -// FLOAT - signed comparison of the represented value (*) -// DOUBLE - signed comparison of the represented value (*) -// BYTE_ARRAY - unsigned byte-wise comparison -// FIXED_LEN_BYTE_ARRAY - unsigned byte-wise comparison -// +// +// BOOLEAN - false, true +// INT32 - signed comparison +// INT64 - signed comparison +// INT96 (only used for legacy timestamps) - undefined +// FLOAT - signed comparison of the represented value (*) +// DOUBLE - signed comparison of the represented value (*) +// BYTE_ARRAY - unsigned byte-wise comparison +// FIXED_LEN_BYTE_ARRAY - unsigned byte-wise comparison +// // (*) Because the sorting order is not specified properly for floating -// point values (relations vs. total ordering) the following -// compatibility rules should be applied when reading statistics: -// - If the min is a NaN, it should be ignored. -// - If the max is a NaN, it should be ignored. -// - If the min is +0, the row group may contain -0 values as well. -// - If the max is -0, the row group may contain +0 values as well. -// - When looking for NaN values, min and max should be ignored. -// -// When writing statistics the following rules should be followed: -// - NaNs should not be written to min or max statistics fields. -// - If the computed max value is zero (whether negative or positive), -// `+0.0` should be written into the max statistics field. -// - If the computed min value is zero (whether negative or positive), -// `-0.0` should be written into the min statistics field. +// +// point values (relations vs. total ordering) the following +// compatibility rules should be applied when reading statistics: +// - If the min is a NaN, it should be ignored. +// - If the max is a NaN, it should be ignored. +// - If the min is +0, the row group may contain -0 values as well. +// - If the max is -0, the row group may contain +0 values as well. +// - When looking for NaN values, min and max should be ignored. +// +// When writing statistics the following rules should be followed: +// - NaNs should not be written to min or max statistics fields. +// - If the computed max value is zero (whether negative or positive), +// `+0.0` should be written into the max statistics field. +// - If the computed min value is zero (whether negative or positive), +// `-0.0` should be written into the min statistics field. type ColumnOrder struct { - TYPE_ORDER *TypeDefinedOrder `thrift:"TYPE_ORDER,1" db:"TYPE_ORDER" json:"TYPE_ORDER,omitempty"` + TYPE_ORDER *TypeDefinedOrder `thrift:"TYPE_ORDER,1" db:"TYPE_ORDER" json:"TYPE_ORDER,omitempty"` } func NewColumnOrder() *ColumnOrder { - return &ColumnOrder{} + return &ColumnOrder{} } var ColumnOrder_TYPE_ORDER_DEFAULT *TypeDefinedOrder + func (p *ColumnOrder) GetTYPE_ORDER() *TypeDefinedOrder { - if !p.IsSetTYPE_ORDER() { - return ColumnOrder_TYPE_ORDER_DEFAULT - } -return p.TYPE_ORDER + if !p.IsSetTYPE_ORDER() { + return ColumnOrder_TYPE_ORDER_DEFAULT + } + return p.TYPE_ORDER } func (p *ColumnOrder) CountSetFieldsColumnOrder() int { - count := 0 - if (p.IsSetTYPE_ORDER()) { - count++ - } - return count + count := 0 + if p.IsSetTYPE_ORDER() { + count++ + } + return count } func (p *ColumnOrder) IsSetTYPE_ORDER() bool { - return p.TYPE_ORDER != nil + return p.TYPE_ORDER != nil } func (p *ColumnOrder) Read(ctx context.Context, iprot thrift.TProtocol) error { - if _, err := iprot.ReadStructBegin(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) - } - - - for { - _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) - if err != nil { - return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) - } - if fieldTypeId == thrift.STOP { break; } - switch fieldId { - case 1: - if fieldTypeId == thrift.STRUCT { - if err := p.ReadField1(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - default: - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - if err := iprot.ReadFieldEnd(ctx); err != nil { - return err - } - } - if err := iprot.ReadStructEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) - } - return nil -} - -func (p *ColumnOrder) ReadField1(ctx context.Context, iprot thrift.TProtocol) error { - p.TYPE_ORDER = &TypeDefinedOrder{} - if err := p.TYPE_ORDER.Read(ctx, iprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.TYPE_ORDER), err) - } - return nil + if _, err := iprot.ReadStructBegin(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) + } + + for { + _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) + if err != nil { + return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) + } + if fieldTypeId == thrift.STOP { + break + } + switch fieldId { + case 1: + if fieldTypeId == thrift.STRUCT { + if err := p.ReadField1(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + default: + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + if err := iprot.ReadFieldEnd(ctx); err != nil { + return err + } + } + if err := iprot.ReadStructEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) + } + return nil +} + +func (p *ColumnOrder) ReadField1(ctx context.Context, iprot thrift.TProtocol) error { + p.TYPE_ORDER = &TypeDefinedOrder{} + if err := p.TYPE_ORDER.Read(ctx, iprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.TYPE_ORDER), err) + } + return nil } func (p *ColumnOrder) Write(ctx context.Context, oprot thrift.TProtocol) error { - if c := p.CountSetFieldsColumnOrder(); c != 1 { - return fmt.Errorf("%T write union: exactly one field must be set (%d set)", p, c) - } - if err := oprot.WriteStructBegin(ctx, "ColumnOrder"); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) } - if p != nil { - if err := p.writeField1(ctx, oprot); err != nil { return err } - } - if err := oprot.WriteFieldStop(ctx); err != nil { - return thrift.PrependError("write field stop error: ", err) } - if err := oprot.WriteStructEnd(ctx); err != nil { - return thrift.PrependError("write struct stop error: ", err) } - return nil + if c := p.CountSetFieldsColumnOrder(); c != 1 { + return fmt.Errorf("%T write union: exactly one field must be set (%d set)", p, c) + } + if err := oprot.WriteStructBegin(ctx, "ColumnOrder"); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) + } + if p != nil { + if err := p.writeField1(ctx, oprot); err != nil { + return err + } + } + if err := oprot.WriteFieldStop(ctx); err != nil { + return thrift.PrependError("write field stop error: ", err) + } + if err := oprot.WriteStructEnd(ctx); err != nil { + return thrift.PrependError("write struct stop error: ", err) + } + return nil } func (p *ColumnOrder) writeField1(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetTYPE_ORDER() { - if err := oprot.WriteFieldBegin(ctx, "TYPE_ORDER", thrift.STRUCT, 1); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:TYPE_ORDER: ", p), err) } - if err := p.TYPE_ORDER.Write(ctx, oprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.TYPE_ORDER), err) - } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 1:TYPE_ORDER: ", p), err) } - } - return err + if p.IsSetTYPE_ORDER() { + if err := oprot.WriteFieldBegin(ctx, "TYPE_ORDER", thrift.STRUCT, 1); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:TYPE_ORDER: ", p), err) + } + if err := p.TYPE_ORDER.Write(ctx, oprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.TYPE_ORDER), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 1:TYPE_ORDER: ", p), err) + } + } + return err } func (p *ColumnOrder) Equals(other *ColumnOrder) bool { - if p == other { - return true - } else if p == nil || other == nil { - return false - } - if !p.TYPE_ORDER.Equals(other.TYPE_ORDER) { return false } - return true + if p == other { + return true + } else if p == nil || other == nil { + return false + } + if !p.TYPE_ORDER.Equals(other.TYPE_ORDER) { + return false + } + return true } func (p *ColumnOrder) String() string { - if p == nil { - return "" - } - return fmt.Sprintf("ColumnOrder(%+v)", *p) + if p == nil { + return "" + } + return fmt.Sprintf("ColumnOrder(%+v)", *p) } func (p *ColumnOrder) Validate() error { - return nil + return nil } + // Attributes: -// - Offset: Offset of the page in the file * -// - CompressedPageSize: Size of the page, including header. Sum of compressed_page_size and header +// - Offset: Offset of the page in the file * +// - CompressedPageSize: Size of the page, including header. Sum of compressed_page_size and header +// // length -// - FirstRowIndex: Index within the RowGroup of the first row of the page; this means pages +// - FirstRowIndex: Index within the RowGroup of the first row of the page; this means pages +// // change on record boundaries (r = 0). type PageLocation struct { - Offset int64 `thrift:"offset,1,required" db:"offset" json:"offset"` - CompressedPageSize int32 `thrift:"compressed_page_size,2,required" db:"compressed_page_size" json:"compressed_page_size"` - FirstRowIndex int64 `thrift:"first_row_index,3,required" db:"first_row_index" json:"first_row_index"` + Offset int64 `thrift:"offset,1,required" db:"offset" json:"offset"` + CompressedPageSize int32 `thrift:"compressed_page_size,2,required" db:"compressed_page_size" json:"compressed_page_size"` + FirstRowIndex int64 `thrift:"first_row_index,3,required" db:"first_row_index" json:"first_row_index"` } func NewPageLocation() *PageLocation { - return &PageLocation{} + return &PageLocation{} } - func (p *PageLocation) GetOffset() int64 { - return p.Offset + return p.Offset } func (p *PageLocation) GetCompressedPageSize() int32 { - return p.CompressedPageSize + return p.CompressedPageSize } func (p *PageLocation) GetFirstRowIndex() int64 { - return p.FirstRowIndex + return p.FirstRowIndex } func (p *PageLocation) Read(ctx context.Context, iprot thrift.TProtocol) error { - if _, err := iprot.ReadStructBegin(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) - } - - var issetOffset bool = false; - var issetCompressedPageSize bool = false; - var issetFirstRowIndex bool = false; - - for { - _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) - if err != nil { - return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) - } - if fieldTypeId == thrift.STOP { break; } - switch fieldId { - case 1: - if fieldTypeId == thrift.I64 { - if err := p.ReadField1(ctx, iprot); err != nil { - return err - } - issetOffset = true - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 2: - if fieldTypeId == thrift.I32 { - if err := p.ReadField2(ctx, iprot); err != nil { - return err - } - issetCompressedPageSize = true - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 3: - if fieldTypeId == thrift.I64 { - if err := p.ReadField3(ctx, iprot); err != nil { - return err - } - issetFirstRowIndex = true - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - default: - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - if err := iprot.ReadFieldEnd(ctx); err != nil { - return err - } - } - if err := iprot.ReadStructEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) - } - if !issetOffset{ - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Offset is not set")); - } - if !issetCompressedPageSize{ - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field CompressedPageSize is not set")); - } - if !issetFirstRowIndex{ - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field FirstRowIndex is not set")); - } - return nil -} - -func (p *PageLocation) ReadField1(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadI64(ctx); err != nil { - return thrift.PrependError("error reading field 1: ", err) -} else { - p.Offset = v -} - return nil -} - -func (p *PageLocation) ReadField2(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadI32(ctx); err != nil { - return thrift.PrependError("error reading field 2: ", err) -} else { - p.CompressedPageSize = v -} - return nil -} - -func (p *PageLocation) ReadField3(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadI64(ctx); err != nil { - return thrift.PrependError("error reading field 3: ", err) -} else { - p.FirstRowIndex = v -} - return nil + if _, err := iprot.ReadStructBegin(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) + } + + var issetOffset bool = false + var issetCompressedPageSize bool = false + var issetFirstRowIndex bool = false + + for { + _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) + if err != nil { + return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) + } + if fieldTypeId == thrift.STOP { + break + } + switch fieldId { + case 1: + if fieldTypeId == thrift.I64 { + if err := p.ReadField1(ctx, iprot); err != nil { + return err + } + issetOffset = true + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 2: + if fieldTypeId == thrift.I32 { + if err := p.ReadField2(ctx, iprot); err != nil { + return err + } + issetCompressedPageSize = true + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 3: + if fieldTypeId == thrift.I64 { + if err := p.ReadField3(ctx, iprot); err != nil { + return err + } + issetFirstRowIndex = true + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + default: + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + if err := iprot.ReadFieldEnd(ctx); err != nil { + return err + } + } + if err := iprot.ReadStructEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) + } + if !issetOffset { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Offset is not set")) + } + if !issetCompressedPageSize { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field CompressedPageSize is not set")) + } + if !issetFirstRowIndex { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field FirstRowIndex is not set")) + } + return nil +} + +func (p *PageLocation) ReadField1(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadI64(ctx); err != nil { + return thrift.PrependError("error reading field 1: ", err) + } else { + p.Offset = v + } + return nil +} + +func (p *PageLocation) ReadField2(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadI32(ctx); err != nil { + return thrift.PrependError("error reading field 2: ", err) + } else { + p.CompressedPageSize = v + } + return nil +} + +func (p *PageLocation) ReadField3(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadI64(ctx); err != nil { + return thrift.PrependError("error reading field 3: ", err) + } else { + p.FirstRowIndex = v + } + return nil } func (p *PageLocation) Write(ctx context.Context, oprot thrift.TProtocol) error { - if err := oprot.WriteStructBegin(ctx, "PageLocation"); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) } - if p != nil { - if err := p.writeField1(ctx, oprot); err != nil { return err } - if err := p.writeField2(ctx, oprot); err != nil { return err } - if err := p.writeField3(ctx, oprot); err != nil { return err } - } - if err := oprot.WriteFieldStop(ctx); err != nil { - return thrift.PrependError("write field stop error: ", err) } - if err := oprot.WriteStructEnd(ctx); err != nil { - return thrift.PrependError("write struct stop error: ", err) } - return nil + if err := oprot.WriteStructBegin(ctx, "PageLocation"); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) + } + if p != nil { + if err := p.writeField1(ctx, oprot); err != nil { + return err + } + if err := p.writeField2(ctx, oprot); err != nil { + return err + } + if err := p.writeField3(ctx, oprot); err != nil { + return err + } + } + if err := oprot.WriteFieldStop(ctx); err != nil { + return thrift.PrependError("write field stop error: ", err) + } + if err := oprot.WriteStructEnd(ctx); err != nil { + return thrift.PrependError("write struct stop error: ", err) + } + return nil } func (p *PageLocation) writeField1(ctx context.Context, oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin(ctx, "offset", thrift.I64, 1); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:offset: ", p), err) } - if err := oprot.WriteI64(ctx, int64(p.Offset)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.offset (1) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 1:offset: ", p), err) } - return err + if err := oprot.WriteFieldBegin(ctx, "offset", thrift.I64, 1); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:offset: ", p), err) + } + if err := oprot.WriteI64(ctx, int64(p.Offset)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.offset (1) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 1:offset: ", p), err) + } + return err } func (p *PageLocation) writeField2(ctx context.Context, oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin(ctx, "compressed_page_size", thrift.I32, 2); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:compressed_page_size: ", p), err) } - if err := oprot.WriteI32(ctx, int32(p.CompressedPageSize)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.compressed_page_size (2) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 2:compressed_page_size: ", p), err) } - return err + if err := oprot.WriteFieldBegin(ctx, "compressed_page_size", thrift.I32, 2); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:compressed_page_size: ", p), err) + } + if err := oprot.WriteI32(ctx, int32(p.CompressedPageSize)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.compressed_page_size (2) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 2:compressed_page_size: ", p), err) + } + return err } func (p *PageLocation) writeField3(ctx context.Context, oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin(ctx, "first_row_index", thrift.I64, 3); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 3:first_row_index: ", p), err) } - if err := oprot.WriteI64(ctx, int64(p.FirstRowIndex)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.first_row_index (3) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 3:first_row_index: ", p), err) } - return err + if err := oprot.WriteFieldBegin(ctx, "first_row_index", thrift.I64, 3); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 3:first_row_index: ", p), err) + } + if err := oprot.WriteI64(ctx, int64(p.FirstRowIndex)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.first_row_index (3) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 3:first_row_index: ", p), err) + } + return err } func (p *PageLocation) Equals(other *PageLocation) bool { - if p == other { - return true - } else if p == nil || other == nil { - return false - } - if p.Offset != other.Offset { return false } - if p.CompressedPageSize != other.CompressedPageSize { return false } - if p.FirstRowIndex != other.FirstRowIndex { return false } - return true + if p == other { + return true + } else if p == nil || other == nil { + return false + } + if p.Offset != other.Offset { + return false + } + if p.CompressedPageSize != other.CompressedPageSize { + return false + } + if p.FirstRowIndex != other.FirstRowIndex { + return false + } + return true } func (p *PageLocation) String() string { - if p == nil { - return "" - } - return fmt.Sprintf("PageLocation(%+v)", *p) + if p == nil { + return "" + } + return fmt.Sprintf("PageLocation(%+v)", *p) } func (p *PageLocation) Validate() error { - return nil + return nil } + // Attributes: -// - PageLocations: PageLocations, ordered by increasing PageLocation.offset. It is required +// - PageLocations: PageLocations, ordered by increasing PageLocation.offset. It is required +// // that page_locations[i].first_row_index < page_locations[i+1].first_row_index. type OffsetIndex struct { - PageLocations []*PageLocation `thrift:"page_locations,1,required" db:"page_locations" json:"page_locations"` + PageLocations []*PageLocation `thrift:"page_locations,1,required" db:"page_locations" json:"page_locations"` } func NewOffsetIndex() *OffsetIndex { - return &OffsetIndex{} + return &OffsetIndex{} } - func (p *OffsetIndex) GetPageLocations() []*PageLocation { - return p.PageLocations + return p.PageLocations } func (p *OffsetIndex) Read(ctx context.Context, iprot thrift.TProtocol) error { - if _, err := iprot.ReadStructBegin(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) - } - - var issetPageLocations bool = false; - - for { - _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) - if err != nil { - return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) - } - if fieldTypeId == thrift.STOP { break; } - switch fieldId { - case 1: - if fieldTypeId == thrift.LIST { - if err := p.ReadField1(ctx, iprot); err != nil { - return err - } - issetPageLocations = true - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - default: - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - if err := iprot.ReadFieldEnd(ctx); err != nil { - return err - } - } - if err := iprot.ReadStructEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) - } - if !issetPageLocations{ - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field PageLocations is not set")); - } - return nil -} - -func (p *OffsetIndex) ReadField1(ctx context.Context, iprot thrift.TProtocol) error { - _, size, err := iprot.ReadListBegin(ctx) - if err != nil { - return thrift.PrependError("error reading list begin: ", err) - } - tSlice := make([]*PageLocation, 0, size) - p.PageLocations = tSlice - for i := 0; i < size; i ++ { - _elem14 := &PageLocation{} - if err := _elem14.Read(ctx, iprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", _elem14), err) - } - p.PageLocations = append(p.PageLocations, _elem14) - } - if err := iprot.ReadListEnd(ctx); err != nil { - return thrift.PrependError("error reading list end: ", err) - } - return nil + if _, err := iprot.ReadStructBegin(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) + } + + var issetPageLocations bool = false + + for { + _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) + if err != nil { + return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) + } + if fieldTypeId == thrift.STOP { + break + } + switch fieldId { + case 1: + if fieldTypeId == thrift.LIST { + if err := p.ReadField1(ctx, iprot); err != nil { + return err + } + issetPageLocations = true + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + default: + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + if err := iprot.ReadFieldEnd(ctx); err != nil { + return err + } + } + if err := iprot.ReadStructEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) + } + if !issetPageLocations { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field PageLocations is not set")) + } + return nil +} + +func (p *OffsetIndex) ReadField1(ctx context.Context, iprot thrift.TProtocol) error { + _, size, err := iprot.ReadListBegin(ctx) + if err != nil { + return thrift.PrependError("error reading list begin: ", err) + } + tSlice := make([]*PageLocation, 0, size) + p.PageLocations = tSlice + for i := 0; i < size; i++ { + _elem14 := &PageLocation{} + if err := _elem14.Read(ctx, iprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", _elem14), err) + } + p.PageLocations = append(p.PageLocations, _elem14) + } + if err := iprot.ReadListEnd(ctx); err != nil { + return thrift.PrependError("error reading list end: ", err) + } + return nil } func (p *OffsetIndex) Write(ctx context.Context, oprot thrift.TProtocol) error { - if err := oprot.WriteStructBegin(ctx, "OffsetIndex"); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) } - if p != nil { - if err := p.writeField1(ctx, oprot); err != nil { return err } - } - if err := oprot.WriteFieldStop(ctx); err != nil { - return thrift.PrependError("write field stop error: ", err) } - if err := oprot.WriteStructEnd(ctx); err != nil { - return thrift.PrependError("write struct stop error: ", err) } - return nil + if err := oprot.WriteStructBegin(ctx, "OffsetIndex"); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) + } + if p != nil { + if err := p.writeField1(ctx, oprot); err != nil { + return err + } + } + if err := oprot.WriteFieldStop(ctx); err != nil { + return thrift.PrependError("write field stop error: ", err) + } + if err := oprot.WriteStructEnd(ctx); err != nil { + return thrift.PrependError("write struct stop error: ", err) + } + return nil } func (p *OffsetIndex) writeField1(ctx context.Context, oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin(ctx, "page_locations", thrift.LIST, 1); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:page_locations: ", p), err) } - if err := oprot.WriteListBegin(ctx, thrift.STRUCT, len(p.PageLocations)); err != nil { - return thrift.PrependError("error writing list begin: ", err) - } - for _, v := range p.PageLocations { - if err := v.Write(ctx, oprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", v), err) - } - } - if err := oprot.WriteListEnd(ctx); err != nil { - return thrift.PrependError("error writing list end: ", err) - } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 1:page_locations: ", p), err) } - return err + if err := oprot.WriteFieldBegin(ctx, "page_locations", thrift.LIST, 1); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:page_locations: ", p), err) + } + if err := oprot.WriteListBegin(ctx, thrift.STRUCT, len(p.PageLocations)); err != nil { + return thrift.PrependError("error writing list begin: ", err) + } + for _, v := range p.PageLocations { + if err := v.Write(ctx, oprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", v), err) + } + } + if err := oprot.WriteListEnd(ctx); err != nil { + return thrift.PrependError("error writing list end: ", err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 1:page_locations: ", p), err) + } + return err } func (p *OffsetIndex) Equals(other *OffsetIndex) bool { - if p == other { - return true - } else if p == nil || other == nil { - return false - } - if len(p.PageLocations) != len(other.PageLocations) { return false } - for i, _tgt := range p.PageLocations { - _src15 := other.PageLocations[i] - if !_tgt.Equals(_src15) { return false } - } - return true + if p == other { + return true + } else if p == nil || other == nil { + return false + } + if len(p.PageLocations) != len(other.PageLocations) { + return false + } + for i, _tgt := range p.PageLocations { + _src15 := other.PageLocations[i] + if !_tgt.Equals(_src15) { + return false + } + } + return true } func (p *OffsetIndex) String() string { - if p == nil { - return "" - } - return fmt.Sprintf("OffsetIndex(%+v)", *p) + if p == nil { + return "" + } + return fmt.Sprintf("OffsetIndex(%+v)", *p) } func (p *OffsetIndex) Validate() error { - return nil + return nil } + // Description for ColumnIndex. // Each [i] refers to the page at OffsetIndex.page_locations[i] -// +// // Attributes: -// - NullPages: A list of Boolean values to determine the validity of the corresponding +// - NullPages: A list of Boolean values to determine the validity of the corresponding +// // min and max values. If true, a page contains only null values, and writers // have to set the corresponding entries in min_values and max_values to // byte[0], so that all lists have the same length. If false, the // corresponding entries in min_values and max_values must be valid. -// - MinValues: Two lists containing lower and upper bounds for the values of each page +// - MinValues: Two lists containing lower and upper bounds for the values of each page +// // determined by the ColumnOrder of the column. These may be the actual // minimum and maximum values found on a page, but can also be (more compact) // values that do not exist on a page. For example, instead of storing ""Blart @@ -9528,1722 +10840,1957 @@ func (p *OffsetIndex) Validate() error { // Such more compact values must still be valid values within the column's // logical type. Readers must make sure that list entries are populated before // using them by inspecting null_pages. -// - MaxValues -// - BoundaryOrder: Stores whether both min_values and max_values are ordered and if so, in +// - MaxValues +// - BoundaryOrder: Stores whether both min_values and max_values are ordered and if so, in +// // which direction. This allows readers to perform binary searches in both // lists. Readers cannot assume that max_values[i] <= min_values[i+1], even // if the lists are ordered. -// - NullCounts: A list containing the number of null values for each page * +// - NullCounts: A list containing the number of null values for each page * type ColumnIndex struct { - NullPages []bool `thrift:"null_pages,1,required" db:"null_pages" json:"null_pages"` - MinValues [][]byte `thrift:"min_values,2,required" db:"min_values" json:"min_values"` - MaxValues [][]byte `thrift:"max_values,3,required" db:"max_values" json:"max_values"` - BoundaryOrder BoundaryOrder `thrift:"boundary_order,4,required" db:"boundary_order" json:"boundary_order"` - NullCounts []int64 `thrift:"null_counts,5" db:"null_counts" json:"null_counts,omitempty"` + NullPages []bool `thrift:"null_pages,1,required" db:"null_pages" json:"null_pages"` + MinValues [][]byte `thrift:"min_values,2,required" db:"min_values" json:"min_values"` + MaxValues [][]byte `thrift:"max_values,3,required" db:"max_values" json:"max_values"` + BoundaryOrder BoundaryOrder `thrift:"boundary_order,4,required" db:"boundary_order" json:"boundary_order"` + NullCounts []int64 `thrift:"null_counts,5" db:"null_counts" json:"null_counts,omitempty"` } func NewColumnIndex() *ColumnIndex { - return &ColumnIndex{} + return &ColumnIndex{} } - func (p *ColumnIndex) GetNullPages() []bool { - return p.NullPages + return p.NullPages } func (p *ColumnIndex) GetMinValues() [][]byte { - return p.MinValues + return p.MinValues } func (p *ColumnIndex) GetMaxValues() [][]byte { - return p.MaxValues + return p.MaxValues } func (p *ColumnIndex) GetBoundaryOrder() BoundaryOrder { - return p.BoundaryOrder + return p.BoundaryOrder } + var ColumnIndex_NullCounts_DEFAULT []int64 func (p *ColumnIndex) GetNullCounts() []int64 { - return p.NullCounts + return p.NullCounts } func (p *ColumnIndex) IsSetNullCounts() bool { - return p.NullCounts != nil + return p.NullCounts != nil } func (p *ColumnIndex) Read(ctx context.Context, iprot thrift.TProtocol) error { - if _, err := iprot.ReadStructBegin(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) - } - - var issetNullPages bool = false; - var issetMinValues bool = false; - var issetMaxValues bool = false; - var issetBoundaryOrder bool = false; - - for { - _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) - if err != nil { - return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) - } - if fieldTypeId == thrift.STOP { break; } - switch fieldId { - case 1: - if fieldTypeId == thrift.LIST { - if err := p.ReadField1(ctx, iprot); err != nil { - return err - } - issetNullPages = true - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 2: - if fieldTypeId == thrift.LIST { - if err := p.ReadField2(ctx, iprot); err != nil { - return err - } - issetMinValues = true - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 3: - if fieldTypeId == thrift.LIST { - if err := p.ReadField3(ctx, iprot); err != nil { - return err - } - issetMaxValues = true - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 4: - if fieldTypeId == thrift.I32 { - if err := p.ReadField4(ctx, iprot); err != nil { - return err - } - issetBoundaryOrder = true - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 5: - if fieldTypeId == thrift.LIST { - if err := p.ReadField5(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - default: - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - if err := iprot.ReadFieldEnd(ctx); err != nil { - return err - } - } - if err := iprot.ReadStructEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) - } - if !issetNullPages{ - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field NullPages is not set")); - } - if !issetMinValues{ - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field MinValues is not set")); - } - if !issetMaxValues{ - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field MaxValues is not set")); - } - if !issetBoundaryOrder{ - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field BoundaryOrder is not set")); - } - return nil -} - -func (p *ColumnIndex) ReadField1(ctx context.Context, iprot thrift.TProtocol) error { - _, size, err := iprot.ReadListBegin(ctx) - if err != nil { - return thrift.PrependError("error reading list begin: ", err) - } - tSlice := make([]bool, 0, size) - p.NullPages = tSlice - for i := 0; i < size; i ++ { -var _elem16 bool - if v, err := iprot.ReadBool(ctx); err != nil { - return thrift.PrependError("error reading field 0: ", err) -} else { - _elem16 = v -} - p.NullPages = append(p.NullPages, _elem16) - } - if err := iprot.ReadListEnd(ctx); err != nil { - return thrift.PrependError("error reading list end: ", err) - } - return nil -} - -func (p *ColumnIndex) ReadField2(ctx context.Context, iprot thrift.TProtocol) error { - _, size, err := iprot.ReadListBegin(ctx) - if err != nil { - return thrift.PrependError("error reading list begin: ", err) - } - tSlice := make([][]byte, 0, size) - p.MinValues = tSlice - for i := 0; i < size; i ++ { -var _elem17 []byte - if v, err := iprot.ReadBinary(ctx); err != nil { - return thrift.PrependError("error reading field 0: ", err) -} else { - _elem17 = v -} - p.MinValues = append(p.MinValues, _elem17) - } - if err := iprot.ReadListEnd(ctx); err != nil { - return thrift.PrependError("error reading list end: ", err) - } - return nil -} - -func (p *ColumnIndex) ReadField3(ctx context.Context, iprot thrift.TProtocol) error { - _, size, err := iprot.ReadListBegin(ctx) - if err != nil { - return thrift.PrependError("error reading list begin: ", err) - } - tSlice := make([][]byte, 0, size) - p.MaxValues = tSlice - for i := 0; i < size; i ++ { -var _elem18 []byte - if v, err := iprot.ReadBinary(ctx); err != nil { - return thrift.PrependError("error reading field 0: ", err) -} else { - _elem18 = v -} - p.MaxValues = append(p.MaxValues, _elem18) - } - if err := iprot.ReadListEnd(ctx); err != nil { - return thrift.PrependError("error reading list end: ", err) - } - return nil -} - -func (p *ColumnIndex) ReadField4(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadI32(ctx); err != nil { - return thrift.PrependError("error reading field 4: ", err) -} else { - temp := BoundaryOrder(v) - p.BoundaryOrder = temp -} - return nil -} - -func (p *ColumnIndex) ReadField5(ctx context.Context, iprot thrift.TProtocol) error { - _, size, err := iprot.ReadListBegin(ctx) - if err != nil { - return thrift.PrependError("error reading list begin: ", err) - } - tSlice := make([]int64, 0, size) - p.NullCounts = tSlice - for i := 0; i < size; i ++ { -var _elem19 int64 - if v, err := iprot.ReadI64(ctx); err != nil { - return thrift.PrependError("error reading field 0: ", err) -} else { - _elem19 = v -} - p.NullCounts = append(p.NullCounts, _elem19) - } - if err := iprot.ReadListEnd(ctx); err != nil { - return thrift.PrependError("error reading list end: ", err) - } - return nil + if _, err := iprot.ReadStructBegin(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) + } + + var issetNullPages bool = false + var issetMinValues bool = false + var issetMaxValues bool = false + var issetBoundaryOrder bool = false + + for { + _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) + if err != nil { + return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) + } + if fieldTypeId == thrift.STOP { + break + } + switch fieldId { + case 1: + if fieldTypeId == thrift.LIST { + if err := p.ReadField1(ctx, iprot); err != nil { + return err + } + issetNullPages = true + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 2: + if fieldTypeId == thrift.LIST { + if err := p.ReadField2(ctx, iprot); err != nil { + return err + } + issetMinValues = true + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 3: + if fieldTypeId == thrift.LIST { + if err := p.ReadField3(ctx, iprot); err != nil { + return err + } + issetMaxValues = true + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 4: + if fieldTypeId == thrift.I32 { + if err := p.ReadField4(ctx, iprot); err != nil { + return err + } + issetBoundaryOrder = true + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 5: + if fieldTypeId == thrift.LIST { + if err := p.ReadField5(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + default: + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + if err := iprot.ReadFieldEnd(ctx); err != nil { + return err + } + } + if err := iprot.ReadStructEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) + } + if !issetNullPages { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field NullPages is not set")) + } + if !issetMinValues { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field MinValues is not set")) + } + if !issetMaxValues { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field MaxValues is not set")) + } + if !issetBoundaryOrder { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field BoundaryOrder is not set")) + } + return nil +} + +func (p *ColumnIndex) ReadField1(ctx context.Context, iprot thrift.TProtocol) error { + _, size, err := iprot.ReadListBegin(ctx) + if err != nil { + return thrift.PrependError("error reading list begin: ", err) + } + tSlice := make([]bool, 0, size) + p.NullPages = tSlice + for i := 0; i < size; i++ { + var _elem16 bool + if v, err := iprot.ReadBool(ctx); err != nil { + return thrift.PrependError("error reading field 0: ", err) + } else { + _elem16 = v + } + p.NullPages = append(p.NullPages, _elem16) + } + if err := iprot.ReadListEnd(ctx); err != nil { + return thrift.PrependError("error reading list end: ", err) + } + return nil +} + +func (p *ColumnIndex) ReadField2(ctx context.Context, iprot thrift.TProtocol) error { + _, size, err := iprot.ReadListBegin(ctx) + if err != nil { + return thrift.PrependError("error reading list begin: ", err) + } + tSlice := make([][]byte, 0, size) + p.MinValues = tSlice + for i := 0; i < size; i++ { + var _elem17 []byte + if v, err := iprot.ReadBinary(ctx); err != nil { + return thrift.PrependError("error reading field 0: ", err) + } else { + _elem17 = v + } + p.MinValues = append(p.MinValues, _elem17) + } + if err := iprot.ReadListEnd(ctx); err != nil { + return thrift.PrependError("error reading list end: ", err) + } + return nil +} + +func (p *ColumnIndex) ReadField3(ctx context.Context, iprot thrift.TProtocol) error { + _, size, err := iprot.ReadListBegin(ctx) + if err != nil { + return thrift.PrependError("error reading list begin: ", err) + } + tSlice := make([][]byte, 0, size) + p.MaxValues = tSlice + for i := 0; i < size; i++ { + var _elem18 []byte + if v, err := iprot.ReadBinary(ctx); err != nil { + return thrift.PrependError("error reading field 0: ", err) + } else { + _elem18 = v + } + p.MaxValues = append(p.MaxValues, _elem18) + } + if err := iprot.ReadListEnd(ctx); err != nil { + return thrift.PrependError("error reading list end: ", err) + } + return nil +} + +func (p *ColumnIndex) ReadField4(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadI32(ctx); err != nil { + return thrift.PrependError("error reading field 4: ", err) + } else { + temp := BoundaryOrder(v) + p.BoundaryOrder = temp + } + return nil +} + +func (p *ColumnIndex) ReadField5(ctx context.Context, iprot thrift.TProtocol) error { + _, size, err := iprot.ReadListBegin(ctx) + if err != nil { + return thrift.PrependError("error reading list begin: ", err) + } + tSlice := make([]int64, 0, size) + p.NullCounts = tSlice + for i := 0; i < size; i++ { + var _elem19 int64 + if v, err := iprot.ReadI64(ctx); err != nil { + return thrift.PrependError("error reading field 0: ", err) + } else { + _elem19 = v + } + p.NullCounts = append(p.NullCounts, _elem19) + } + if err := iprot.ReadListEnd(ctx); err != nil { + return thrift.PrependError("error reading list end: ", err) + } + return nil } func (p *ColumnIndex) Write(ctx context.Context, oprot thrift.TProtocol) error { - if err := oprot.WriteStructBegin(ctx, "ColumnIndex"); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) } - if p != nil { - if err := p.writeField1(ctx, oprot); err != nil { return err } - if err := p.writeField2(ctx, oprot); err != nil { return err } - if err := p.writeField3(ctx, oprot); err != nil { return err } - if err := p.writeField4(ctx, oprot); err != nil { return err } - if err := p.writeField5(ctx, oprot); err != nil { return err } - } - if err := oprot.WriteFieldStop(ctx); err != nil { - return thrift.PrependError("write field stop error: ", err) } - if err := oprot.WriteStructEnd(ctx); err != nil { - return thrift.PrependError("write struct stop error: ", err) } - return nil + if err := oprot.WriteStructBegin(ctx, "ColumnIndex"); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) + } + if p != nil { + if err := p.writeField1(ctx, oprot); err != nil { + return err + } + if err := p.writeField2(ctx, oprot); err != nil { + return err + } + if err := p.writeField3(ctx, oprot); err != nil { + return err + } + if err := p.writeField4(ctx, oprot); err != nil { + return err + } + if err := p.writeField5(ctx, oprot); err != nil { + return err + } + } + if err := oprot.WriteFieldStop(ctx); err != nil { + return thrift.PrependError("write field stop error: ", err) + } + if err := oprot.WriteStructEnd(ctx); err != nil { + return thrift.PrependError("write struct stop error: ", err) + } + return nil } func (p *ColumnIndex) writeField1(ctx context.Context, oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin(ctx, "null_pages", thrift.LIST, 1); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:null_pages: ", p), err) } - if err := oprot.WriteListBegin(ctx, thrift.BOOL, len(p.NullPages)); err != nil { - return thrift.PrependError("error writing list begin: ", err) - } - for _, v := range p.NullPages { - if err := oprot.WriteBool(ctx, bool(v)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T. (0) field write error: ", p), err) } - } - if err := oprot.WriteListEnd(ctx); err != nil { - return thrift.PrependError("error writing list end: ", err) - } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 1:null_pages: ", p), err) } - return err + if err := oprot.WriteFieldBegin(ctx, "null_pages", thrift.LIST, 1); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:null_pages: ", p), err) + } + if err := oprot.WriteListBegin(ctx, thrift.BOOL, len(p.NullPages)); err != nil { + return thrift.PrependError("error writing list begin: ", err) + } + for _, v := range p.NullPages { + if err := oprot.WriteBool(ctx, bool(v)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T. (0) field write error: ", p), err) + } + } + if err := oprot.WriteListEnd(ctx); err != nil { + return thrift.PrependError("error writing list end: ", err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 1:null_pages: ", p), err) + } + return err } func (p *ColumnIndex) writeField2(ctx context.Context, oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin(ctx, "min_values", thrift.LIST, 2); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:min_values: ", p), err) } - if err := oprot.WriteListBegin(ctx, thrift.STRING, len(p.MinValues)); err != nil { - return thrift.PrependError("error writing list begin: ", err) - } - for _, v := range p.MinValues { - if err := oprot.WriteBinary(ctx, v); err != nil { - return thrift.PrependError(fmt.Sprintf("%T. (0) field write error: ", p), err) } - } - if err := oprot.WriteListEnd(ctx); err != nil { - return thrift.PrependError("error writing list end: ", err) - } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 2:min_values: ", p), err) } - return err + if err := oprot.WriteFieldBegin(ctx, "min_values", thrift.LIST, 2); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:min_values: ", p), err) + } + if err := oprot.WriteListBegin(ctx, thrift.STRING, len(p.MinValues)); err != nil { + return thrift.PrependError("error writing list begin: ", err) + } + for _, v := range p.MinValues { + if err := oprot.WriteBinary(ctx, v); err != nil { + return thrift.PrependError(fmt.Sprintf("%T. (0) field write error: ", p), err) + } + } + if err := oprot.WriteListEnd(ctx); err != nil { + return thrift.PrependError("error writing list end: ", err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 2:min_values: ", p), err) + } + return err } func (p *ColumnIndex) writeField3(ctx context.Context, oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin(ctx, "max_values", thrift.LIST, 3); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 3:max_values: ", p), err) } - if err := oprot.WriteListBegin(ctx, thrift.STRING, len(p.MaxValues)); err != nil { - return thrift.PrependError("error writing list begin: ", err) - } - for _, v := range p.MaxValues { - if err := oprot.WriteBinary(ctx, v); err != nil { - return thrift.PrependError(fmt.Sprintf("%T. (0) field write error: ", p), err) } - } - if err := oprot.WriteListEnd(ctx); err != nil { - return thrift.PrependError("error writing list end: ", err) - } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 3:max_values: ", p), err) } - return err + if err := oprot.WriteFieldBegin(ctx, "max_values", thrift.LIST, 3); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 3:max_values: ", p), err) + } + if err := oprot.WriteListBegin(ctx, thrift.STRING, len(p.MaxValues)); err != nil { + return thrift.PrependError("error writing list begin: ", err) + } + for _, v := range p.MaxValues { + if err := oprot.WriteBinary(ctx, v); err != nil { + return thrift.PrependError(fmt.Sprintf("%T. (0) field write error: ", p), err) + } + } + if err := oprot.WriteListEnd(ctx); err != nil { + return thrift.PrependError("error writing list end: ", err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 3:max_values: ", p), err) + } + return err } func (p *ColumnIndex) writeField4(ctx context.Context, oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin(ctx, "boundary_order", thrift.I32, 4); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 4:boundary_order: ", p), err) } - if err := oprot.WriteI32(ctx, int32(p.BoundaryOrder)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.boundary_order (4) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 4:boundary_order: ", p), err) } - return err + if err := oprot.WriteFieldBegin(ctx, "boundary_order", thrift.I32, 4); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 4:boundary_order: ", p), err) + } + if err := oprot.WriteI32(ctx, int32(p.BoundaryOrder)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.boundary_order (4) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 4:boundary_order: ", p), err) + } + return err } func (p *ColumnIndex) writeField5(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetNullCounts() { - if err := oprot.WriteFieldBegin(ctx, "null_counts", thrift.LIST, 5); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 5:null_counts: ", p), err) } - if err := oprot.WriteListBegin(ctx, thrift.I64, len(p.NullCounts)); err != nil { - return thrift.PrependError("error writing list begin: ", err) - } - for _, v := range p.NullCounts { - if err := oprot.WriteI64(ctx, int64(v)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T. (0) field write error: ", p), err) } - } - if err := oprot.WriteListEnd(ctx); err != nil { - return thrift.PrependError("error writing list end: ", err) - } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 5:null_counts: ", p), err) } - } - return err + if p.IsSetNullCounts() { + if err := oprot.WriteFieldBegin(ctx, "null_counts", thrift.LIST, 5); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 5:null_counts: ", p), err) + } + if err := oprot.WriteListBegin(ctx, thrift.I64, len(p.NullCounts)); err != nil { + return thrift.PrependError("error writing list begin: ", err) + } + for _, v := range p.NullCounts { + if err := oprot.WriteI64(ctx, int64(v)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T. (0) field write error: ", p), err) + } + } + if err := oprot.WriteListEnd(ctx); err != nil { + return thrift.PrependError("error writing list end: ", err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 5:null_counts: ", p), err) + } + } + return err } func (p *ColumnIndex) Equals(other *ColumnIndex) bool { - if p == other { - return true - } else if p == nil || other == nil { - return false - } - if len(p.NullPages) != len(other.NullPages) { return false } - for i, _tgt := range p.NullPages { - _src20 := other.NullPages[i] - if _tgt != _src20 { return false } - } - if len(p.MinValues) != len(other.MinValues) { return false } - for i, _tgt := range p.MinValues { - _src21 := other.MinValues[i] - if bytes.Compare(_tgt, _src21) != 0 { return false } - } - if len(p.MaxValues) != len(other.MaxValues) { return false } - for i, _tgt := range p.MaxValues { - _src22 := other.MaxValues[i] - if bytes.Compare(_tgt, _src22) != 0 { return false } - } - if p.BoundaryOrder != other.BoundaryOrder { return false } - if len(p.NullCounts) != len(other.NullCounts) { return false } - for i, _tgt := range p.NullCounts { - _src23 := other.NullCounts[i] - if _tgt != _src23 { return false } - } - return true + if p == other { + return true + } else if p == nil || other == nil { + return false + } + if len(p.NullPages) != len(other.NullPages) { + return false + } + for i, _tgt := range p.NullPages { + _src20 := other.NullPages[i] + if _tgt != _src20 { + return false + } + } + if len(p.MinValues) != len(other.MinValues) { + return false + } + for i, _tgt := range p.MinValues { + _src21 := other.MinValues[i] + if bytes.Compare(_tgt, _src21) != 0 { + return false + } + } + if len(p.MaxValues) != len(other.MaxValues) { + return false + } + for i, _tgt := range p.MaxValues { + _src22 := other.MaxValues[i] + if bytes.Compare(_tgt, _src22) != 0 { + return false + } + } + if p.BoundaryOrder != other.BoundaryOrder { + return false + } + if len(p.NullCounts) != len(other.NullCounts) { + return false + } + for i, _tgt := range p.NullCounts { + _src23 := other.NullCounts[i] + if _tgt != _src23 { + return false + } + } + return true } func (p *ColumnIndex) String() string { - if p == nil { - return "" - } - return fmt.Sprintf("ColumnIndex(%+v)", *p) + if p == nil { + return "" + } + return fmt.Sprintf("ColumnIndex(%+v)", *p) } func (p *ColumnIndex) Validate() error { - return nil + return nil } + // Attributes: -// - AadPrefix: AAD prefix * -// - AadFileUnique: Unique file identifier part of AAD suffix * -// - SupplyAadPrefix: In files encrypted with AAD prefix without storing it, +// - AadPrefix: AAD prefix * +// - AadFileUnique: Unique file identifier part of AAD suffix * +// - SupplyAadPrefix: In files encrypted with AAD prefix without storing it, +// // readers must supply the prefix * type AesGcmV1 struct { - AadPrefix []byte `thrift:"aad_prefix,1" db:"aad_prefix" json:"aad_prefix,omitempty"` - AadFileUnique []byte `thrift:"aad_file_unique,2" db:"aad_file_unique" json:"aad_file_unique,omitempty"` - SupplyAadPrefix *bool `thrift:"supply_aad_prefix,3" db:"supply_aad_prefix" json:"supply_aad_prefix,omitempty"` + AadPrefix []byte `thrift:"aad_prefix,1" db:"aad_prefix" json:"aad_prefix,omitempty"` + AadFileUnique []byte `thrift:"aad_file_unique,2" db:"aad_file_unique" json:"aad_file_unique,omitempty"` + SupplyAadPrefix *bool `thrift:"supply_aad_prefix,3" db:"supply_aad_prefix" json:"supply_aad_prefix,omitempty"` } func NewAesGcmV1() *AesGcmV1 { - return &AesGcmV1{} + return &AesGcmV1{} } var AesGcmV1_AadPrefix_DEFAULT []byte func (p *AesGcmV1) GetAadPrefix() []byte { - return p.AadPrefix + return p.AadPrefix } + var AesGcmV1_AadFileUnique_DEFAULT []byte func (p *AesGcmV1) GetAadFileUnique() []byte { - return p.AadFileUnique + return p.AadFileUnique } + var AesGcmV1_SupplyAadPrefix_DEFAULT bool + func (p *AesGcmV1) GetSupplyAadPrefix() bool { - if !p.IsSetSupplyAadPrefix() { - return AesGcmV1_SupplyAadPrefix_DEFAULT - } -return *p.SupplyAadPrefix + if !p.IsSetSupplyAadPrefix() { + return AesGcmV1_SupplyAadPrefix_DEFAULT + } + return *p.SupplyAadPrefix } func (p *AesGcmV1) IsSetAadPrefix() bool { - return p.AadPrefix != nil + return p.AadPrefix != nil } func (p *AesGcmV1) IsSetAadFileUnique() bool { - return p.AadFileUnique != nil + return p.AadFileUnique != nil } func (p *AesGcmV1) IsSetSupplyAadPrefix() bool { - return p.SupplyAadPrefix != nil + return p.SupplyAadPrefix != nil } func (p *AesGcmV1) Read(ctx context.Context, iprot thrift.TProtocol) error { - if _, err := iprot.ReadStructBegin(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) - } - - - for { - _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) - if err != nil { - return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) - } - if fieldTypeId == thrift.STOP { break; } - switch fieldId { - case 1: - if fieldTypeId == thrift.STRING { - if err := p.ReadField1(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 2: - if fieldTypeId == thrift.STRING { - if err := p.ReadField2(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 3: - if fieldTypeId == thrift.BOOL { - if err := p.ReadField3(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - default: - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - if err := iprot.ReadFieldEnd(ctx); err != nil { - return err - } - } - if err := iprot.ReadStructEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) - } - return nil -} - -func (p *AesGcmV1) ReadField1(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadBinary(ctx); err != nil { - return thrift.PrependError("error reading field 1: ", err) -} else { - p.AadPrefix = v -} - return nil -} - -func (p *AesGcmV1) ReadField2(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadBinary(ctx); err != nil { - return thrift.PrependError("error reading field 2: ", err) -} else { - p.AadFileUnique = v -} - return nil -} - -func (p *AesGcmV1) ReadField3(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadBool(ctx); err != nil { - return thrift.PrependError("error reading field 3: ", err) -} else { - p.SupplyAadPrefix = &v -} - return nil + if _, err := iprot.ReadStructBegin(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) + } + + for { + _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) + if err != nil { + return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) + } + if fieldTypeId == thrift.STOP { + break + } + switch fieldId { + case 1: + if fieldTypeId == thrift.STRING { + if err := p.ReadField1(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 2: + if fieldTypeId == thrift.STRING { + if err := p.ReadField2(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 3: + if fieldTypeId == thrift.BOOL { + if err := p.ReadField3(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + default: + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + if err := iprot.ReadFieldEnd(ctx); err != nil { + return err + } + } + if err := iprot.ReadStructEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) + } + return nil +} + +func (p *AesGcmV1) ReadField1(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadBinary(ctx); err != nil { + return thrift.PrependError("error reading field 1: ", err) + } else { + p.AadPrefix = v + } + return nil +} + +func (p *AesGcmV1) ReadField2(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadBinary(ctx); err != nil { + return thrift.PrependError("error reading field 2: ", err) + } else { + p.AadFileUnique = v + } + return nil +} + +func (p *AesGcmV1) ReadField3(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadBool(ctx); err != nil { + return thrift.PrependError("error reading field 3: ", err) + } else { + p.SupplyAadPrefix = &v + } + return nil } func (p *AesGcmV1) Write(ctx context.Context, oprot thrift.TProtocol) error { - if err := oprot.WriteStructBegin(ctx, "AesGcmV1"); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) } - if p != nil { - if err := p.writeField1(ctx, oprot); err != nil { return err } - if err := p.writeField2(ctx, oprot); err != nil { return err } - if err := p.writeField3(ctx, oprot); err != nil { return err } - } - if err := oprot.WriteFieldStop(ctx); err != nil { - return thrift.PrependError("write field stop error: ", err) } - if err := oprot.WriteStructEnd(ctx); err != nil { - return thrift.PrependError("write struct stop error: ", err) } - return nil + if err := oprot.WriteStructBegin(ctx, "AesGcmV1"); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) + } + if p != nil { + if err := p.writeField1(ctx, oprot); err != nil { + return err + } + if err := p.writeField2(ctx, oprot); err != nil { + return err + } + if err := p.writeField3(ctx, oprot); err != nil { + return err + } + } + if err := oprot.WriteFieldStop(ctx); err != nil { + return thrift.PrependError("write field stop error: ", err) + } + if err := oprot.WriteStructEnd(ctx); err != nil { + return thrift.PrependError("write struct stop error: ", err) + } + return nil } func (p *AesGcmV1) writeField1(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetAadPrefix() { - if err := oprot.WriteFieldBegin(ctx, "aad_prefix", thrift.STRING, 1); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:aad_prefix: ", p), err) } - if err := oprot.WriteBinary(ctx, p.AadPrefix); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.aad_prefix (1) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 1:aad_prefix: ", p), err) } - } - return err + if p.IsSetAadPrefix() { + if err := oprot.WriteFieldBegin(ctx, "aad_prefix", thrift.STRING, 1); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:aad_prefix: ", p), err) + } + if err := oprot.WriteBinary(ctx, p.AadPrefix); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.aad_prefix (1) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 1:aad_prefix: ", p), err) + } + } + return err } func (p *AesGcmV1) writeField2(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetAadFileUnique() { - if err := oprot.WriteFieldBegin(ctx, "aad_file_unique", thrift.STRING, 2); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:aad_file_unique: ", p), err) } - if err := oprot.WriteBinary(ctx, p.AadFileUnique); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.aad_file_unique (2) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 2:aad_file_unique: ", p), err) } - } - return err + if p.IsSetAadFileUnique() { + if err := oprot.WriteFieldBegin(ctx, "aad_file_unique", thrift.STRING, 2); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:aad_file_unique: ", p), err) + } + if err := oprot.WriteBinary(ctx, p.AadFileUnique); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.aad_file_unique (2) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 2:aad_file_unique: ", p), err) + } + } + return err } func (p *AesGcmV1) writeField3(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetSupplyAadPrefix() { - if err := oprot.WriteFieldBegin(ctx, "supply_aad_prefix", thrift.BOOL, 3); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 3:supply_aad_prefix: ", p), err) } - if err := oprot.WriteBool(ctx, bool(*p.SupplyAadPrefix)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.supply_aad_prefix (3) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 3:supply_aad_prefix: ", p), err) } - } - return err + if p.IsSetSupplyAadPrefix() { + if err := oprot.WriteFieldBegin(ctx, "supply_aad_prefix", thrift.BOOL, 3); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 3:supply_aad_prefix: ", p), err) + } + if err := oprot.WriteBool(ctx, bool(*p.SupplyAadPrefix)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.supply_aad_prefix (3) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 3:supply_aad_prefix: ", p), err) + } + } + return err } func (p *AesGcmV1) Equals(other *AesGcmV1) bool { - if p == other { - return true - } else if p == nil || other == nil { - return false - } - if bytes.Compare(p.AadPrefix, other.AadPrefix) != 0 { return false } - if bytes.Compare(p.AadFileUnique, other.AadFileUnique) != 0 { return false } - if p.SupplyAadPrefix != other.SupplyAadPrefix { - if p.SupplyAadPrefix == nil || other.SupplyAadPrefix == nil { - return false - } - if (*p.SupplyAadPrefix) != (*other.SupplyAadPrefix) { return false } - } - return true + if p == other { + return true + } else if p == nil || other == nil { + return false + } + if bytes.Compare(p.AadPrefix, other.AadPrefix) != 0 { + return false + } + if bytes.Compare(p.AadFileUnique, other.AadFileUnique) != 0 { + return false + } + if p.SupplyAadPrefix != other.SupplyAadPrefix { + if p.SupplyAadPrefix == nil || other.SupplyAadPrefix == nil { + return false + } + if (*p.SupplyAadPrefix) != (*other.SupplyAadPrefix) { + return false + } + } + return true } func (p *AesGcmV1) String() string { - if p == nil { - return "" - } - return fmt.Sprintf("AesGcmV1(%+v)", *p) + if p == nil { + return "" + } + return fmt.Sprintf("AesGcmV1(%+v)", *p) } func (p *AesGcmV1) Validate() error { - return nil + return nil } + // Attributes: -// - AadPrefix: AAD prefix * -// - AadFileUnique: Unique file identifier part of AAD suffix * -// - SupplyAadPrefix: In files encrypted with AAD prefix without storing it, +// - AadPrefix: AAD prefix * +// - AadFileUnique: Unique file identifier part of AAD suffix * +// - SupplyAadPrefix: In files encrypted with AAD prefix without storing it, +// // readers must supply the prefix * type AesGcmCtrV1 struct { - AadPrefix []byte `thrift:"aad_prefix,1" db:"aad_prefix" json:"aad_prefix,omitempty"` - AadFileUnique []byte `thrift:"aad_file_unique,2" db:"aad_file_unique" json:"aad_file_unique,omitempty"` - SupplyAadPrefix *bool `thrift:"supply_aad_prefix,3" db:"supply_aad_prefix" json:"supply_aad_prefix,omitempty"` + AadPrefix []byte `thrift:"aad_prefix,1" db:"aad_prefix" json:"aad_prefix,omitempty"` + AadFileUnique []byte `thrift:"aad_file_unique,2" db:"aad_file_unique" json:"aad_file_unique,omitempty"` + SupplyAadPrefix *bool `thrift:"supply_aad_prefix,3" db:"supply_aad_prefix" json:"supply_aad_prefix,omitempty"` } func NewAesGcmCtrV1() *AesGcmCtrV1 { - return &AesGcmCtrV1{} + return &AesGcmCtrV1{} } var AesGcmCtrV1_AadPrefix_DEFAULT []byte func (p *AesGcmCtrV1) GetAadPrefix() []byte { - return p.AadPrefix + return p.AadPrefix } + var AesGcmCtrV1_AadFileUnique_DEFAULT []byte func (p *AesGcmCtrV1) GetAadFileUnique() []byte { - return p.AadFileUnique + return p.AadFileUnique } + var AesGcmCtrV1_SupplyAadPrefix_DEFAULT bool + func (p *AesGcmCtrV1) GetSupplyAadPrefix() bool { - if !p.IsSetSupplyAadPrefix() { - return AesGcmCtrV1_SupplyAadPrefix_DEFAULT - } -return *p.SupplyAadPrefix + if !p.IsSetSupplyAadPrefix() { + return AesGcmCtrV1_SupplyAadPrefix_DEFAULT + } + return *p.SupplyAadPrefix } func (p *AesGcmCtrV1) IsSetAadPrefix() bool { - return p.AadPrefix != nil + return p.AadPrefix != nil } func (p *AesGcmCtrV1) IsSetAadFileUnique() bool { - return p.AadFileUnique != nil + return p.AadFileUnique != nil } func (p *AesGcmCtrV1) IsSetSupplyAadPrefix() bool { - return p.SupplyAadPrefix != nil + return p.SupplyAadPrefix != nil } func (p *AesGcmCtrV1) Read(ctx context.Context, iprot thrift.TProtocol) error { - if _, err := iprot.ReadStructBegin(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) - } - - - for { - _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) - if err != nil { - return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) - } - if fieldTypeId == thrift.STOP { break; } - switch fieldId { - case 1: - if fieldTypeId == thrift.STRING { - if err := p.ReadField1(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 2: - if fieldTypeId == thrift.STRING { - if err := p.ReadField2(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 3: - if fieldTypeId == thrift.BOOL { - if err := p.ReadField3(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - default: - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - if err := iprot.ReadFieldEnd(ctx); err != nil { - return err - } - } - if err := iprot.ReadStructEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) - } - return nil -} - -func (p *AesGcmCtrV1) ReadField1(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadBinary(ctx); err != nil { - return thrift.PrependError("error reading field 1: ", err) -} else { - p.AadPrefix = v -} - return nil -} - -func (p *AesGcmCtrV1) ReadField2(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadBinary(ctx); err != nil { - return thrift.PrependError("error reading field 2: ", err) -} else { - p.AadFileUnique = v -} - return nil -} - -func (p *AesGcmCtrV1) ReadField3(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadBool(ctx); err != nil { - return thrift.PrependError("error reading field 3: ", err) -} else { - p.SupplyAadPrefix = &v -} - return nil + if _, err := iprot.ReadStructBegin(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) + } + + for { + _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) + if err != nil { + return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) + } + if fieldTypeId == thrift.STOP { + break + } + switch fieldId { + case 1: + if fieldTypeId == thrift.STRING { + if err := p.ReadField1(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 2: + if fieldTypeId == thrift.STRING { + if err := p.ReadField2(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 3: + if fieldTypeId == thrift.BOOL { + if err := p.ReadField3(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + default: + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + if err := iprot.ReadFieldEnd(ctx); err != nil { + return err + } + } + if err := iprot.ReadStructEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) + } + return nil +} + +func (p *AesGcmCtrV1) ReadField1(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadBinary(ctx); err != nil { + return thrift.PrependError("error reading field 1: ", err) + } else { + p.AadPrefix = v + } + return nil +} + +func (p *AesGcmCtrV1) ReadField2(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadBinary(ctx); err != nil { + return thrift.PrependError("error reading field 2: ", err) + } else { + p.AadFileUnique = v + } + return nil +} + +func (p *AesGcmCtrV1) ReadField3(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadBool(ctx); err != nil { + return thrift.PrependError("error reading field 3: ", err) + } else { + p.SupplyAadPrefix = &v + } + return nil } func (p *AesGcmCtrV1) Write(ctx context.Context, oprot thrift.TProtocol) error { - if err := oprot.WriteStructBegin(ctx, "AesGcmCtrV1"); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) } - if p != nil { - if err := p.writeField1(ctx, oprot); err != nil { return err } - if err := p.writeField2(ctx, oprot); err != nil { return err } - if err := p.writeField3(ctx, oprot); err != nil { return err } - } - if err := oprot.WriteFieldStop(ctx); err != nil { - return thrift.PrependError("write field stop error: ", err) } - if err := oprot.WriteStructEnd(ctx); err != nil { - return thrift.PrependError("write struct stop error: ", err) } - return nil + if err := oprot.WriteStructBegin(ctx, "AesGcmCtrV1"); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) + } + if p != nil { + if err := p.writeField1(ctx, oprot); err != nil { + return err + } + if err := p.writeField2(ctx, oprot); err != nil { + return err + } + if err := p.writeField3(ctx, oprot); err != nil { + return err + } + } + if err := oprot.WriteFieldStop(ctx); err != nil { + return thrift.PrependError("write field stop error: ", err) + } + if err := oprot.WriteStructEnd(ctx); err != nil { + return thrift.PrependError("write struct stop error: ", err) + } + return nil } func (p *AesGcmCtrV1) writeField1(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetAadPrefix() { - if err := oprot.WriteFieldBegin(ctx, "aad_prefix", thrift.STRING, 1); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:aad_prefix: ", p), err) } - if err := oprot.WriteBinary(ctx, p.AadPrefix); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.aad_prefix (1) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 1:aad_prefix: ", p), err) } - } - return err + if p.IsSetAadPrefix() { + if err := oprot.WriteFieldBegin(ctx, "aad_prefix", thrift.STRING, 1); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:aad_prefix: ", p), err) + } + if err := oprot.WriteBinary(ctx, p.AadPrefix); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.aad_prefix (1) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 1:aad_prefix: ", p), err) + } + } + return err } func (p *AesGcmCtrV1) writeField2(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetAadFileUnique() { - if err := oprot.WriteFieldBegin(ctx, "aad_file_unique", thrift.STRING, 2); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:aad_file_unique: ", p), err) } - if err := oprot.WriteBinary(ctx, p.AadFileUnique); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.aad_file_unique (2) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 2:aad_file_unique: ", p), err) } - } - return err + if p.IsSetAadFileUnique() { + if err := oprot.WriteFieldBegin(ctx, "aad_file_unique", thrift.STRING, 2); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:aad_file_unique: ", p), err) + } + if err := oprot.WriteBinary(ctx, p.AadFileUnique); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.aad_file_unique (2) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 2:aad_file_unique: ", p), err) + } + } + return err } func (p *AesGcmCtrV1) writeField3(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetSupplyAadPrefix() { - if err := oprot.WriteFieldBegin(ctx, "supply_aad_prefix", thrift.BOOL, 3); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 3:supply_aad_prefix: ", p), err) } - if err := oprot.WriteBool(ctx, bool(*p.SupplyAadPrefix)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.supply_aad_prefix (3) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 3:supply_aad_prefix: ", p), err) } - } - return err + if p.IsSetSupplyAadPrefix() { + if err := oprot.WriteFieldBegin(ctx, "supply_aad_prefix", thrift.BOOL, 3); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 3:supply_aad_prefix: ", p), err) + } + if err := oprot.WriteBool(ctx, bool(*p.SupplyAadPrefix)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.supply_aad_prefix (3) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 3:supply_aad_prefix: ", p), err) + } + } + return err } func (p *AesGcmCtrV1) Equals(other *AesGcmCtrV1) bool { - if p == other { - return true - } else if p == nil || other == nil { - return false - } - if bytes.Compare(p.AadPrefix, other.AadPrefix) != 0 { return false } - if bytes.Compare(p.AadFileUnique, other.AadFileUnique) != 0 { return false } - if p.SupplyAadPrefix != other.SupplyAadPrefix { - if p.SupplyAadPrefix == nil || other.SupplyAadPrefix == nil { - return false - } - if (*p.SupplyAadPrefix) != (*other.SupplyAadPrefix) { return false } - } - return true + if p == other { + return true + } else if p == nil || other == nil { + return false + } + if bytes.Compare(p.AadPrefix, other.AadPrefix) != 0 { + return false + } + if bytes.Compare(p.AadFileUnique, other.AadFileUnique) != 0 { + return false + } + if p.SupplyAadPrefix != other.SupplyAadPrefix { + if p.SupplyAadPrefix == nil || other.SupplyAadPrefix == nil { + return false + } + if (*p.SupplyAadPrefix) != (*other.SupplyAadPrefix) { + return false + } + } + return true } func (p *AesGcmCtrV1) String() string { - if p == nil { - return "" - } - return fmt.Sprintf("AesGcmCtrV1(%+v)", *p) + if p == nil { + return "" + } + return fmt.Sprintf("AesGcmCtrV1(%+v)", *p) } func (p *AesGcmCtrV1) Validate() error { - return nil + return nil } + // Attributes: -// - AES_GCM_V1 -// - AES_GCM_CTR_V1 +// - AES_GCM_V1 +// - AES_GCM_CTR_V1 type EncryptionAlgorithm struct { - AES_GCM_V1 *AesGcmV1 `thrift:"AES_GCM_V1,1" db:"AES_GCM_V1" json:"AES_GCM_V1,omitempty"` - AES_GCM_CTR_V1 *AesGcmCtrV1 `thrift:"AES_GCM_CTR_V1,2" db:"AES_GCM_CTR_V1" json:"AES_GCM_CTR_V1,omitempty"` + AES_GCM_V1 *AesGcmV1 `thrift:"AES_GCM_V1,1" db:"AES_GCM_V1" json:"AES_GCM_V1,omitempty"` + AES_GCM_CTR_V1 *AesGcmCtrV1 `thrift:"AES_GCM_CTR_V1,2" db:"AES_GCM_CTR_V1" json:"AES_GCM_CTR_V1,omitempty"` } func NewEncryptionAlgorithm() *EncryptionAlgorithm { - return &EncryptionAlgorithm{} + return &EncryptionAlgorithm{} } var EncryptionAlgorithm_AES_GCM_V1_DEFAULT *AesGcmV1 + func (p *EncryptionAlgorithm) GetAES_GCM_V1() *AesGcmV1 { - if !p.IsSetAES_GCM_V1() { - return EncryptionAlgorithm_AES_GCM_V1_DEFAULT - } -return p.AES_GCM_V1 + if !p.IsSetAES_GCM_V1() { + return EncryptionAlgorithm_AES_GCM_V1_DEFAULT + } + return p.AES_GCM_V1 } + var EncryptionAlgorithm_AES_GCM_CTR_V1_DEFAULT *AesGcmCtrV1 + func (p *EncryptionAlgorithm) GetAES_GCM_CTR_V1() *AesGcmCtrV1 { - if !p.IsSetAES_GCM_CTR_V1() { - return EncryptionAlgorithm_AES_GCM_CTR_V1_DEFAULT - } -return p.AES_GCM_CTR_V1 + if !p.IsSetAES_GCM_CTR_V1() { + return EncryptionAlgorithm_AES_GCM_CTR_V1_DEFAULT + } + return p.AES_GCM_CTR_V1 } func (p *EncryptionAlgorithm) CountSetFieldsEncryptionAlgorithm() int { - count := 0 - if (p.IsSetAES_GCM_V1()) { - count++ - } - if (p.IsSetAES_GCM_CTR_V1()) { - count++ - } - return count + count := 0 + if p.IsSetAES_GCM_V1() { + count++ + } + if p.IsSetAES_GCM_CTR_V1() { + count++ + } + return count } func (p *EncryptionAlgorithm) IsSetAES_GCM_V1() bool { - return p.AES_GCM_V1 != nil + return p.AES_GCM_V1 != nil } func (p *EncryptionAlgorithm) IsSetAES_GCM_CTR_V1() bool { - return p.AES_GCM_CTR_V1 != nil + return p.AES_GCM_CTR_V1 != nil } func (p *EncryptionAlgorithm) Read(ctx context.Context, iprot thrift.TProtocol) error { - if _, err := iprot.ReadStructBegin(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) - } - - - for { - _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) - if err != nil { - return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) - } - if fieldTypeId == thrift.STOP { break; } - switch fieldId { - case 1: - if fieldTypeId == thrift.STRUCT { - if err := p.ReadField1(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 2: - if fieldTypeId == thrift.STRUCT { - if err := p.ReadField2(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - default: - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - if err := iprot.ReadFieldEnd(ctx); err != nil { - return err - } - } - if err := iprot.ReadStructEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) - } - return nil -} - -func (p *EncryptionAlgorithm) ReadField1(ctx context.Context, iprot thrift.TProtocol) error { - p.AES_GCM_V1 = &AesGcmV1{} - if err := p.AES_GCM_V1.Read(ctx, iprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.AES_GCM_V1), err) - } - return nil -} - -func (p *EncryptionAlgorithm) ReadField2(ctx context.Context, iprot thrift.TProtocol) error { - p.AES_GCM_CTR_V1 = &AesGcmCtrV1{} - if err := p.AES_GCM_CTR_V1.Read(ctx, iprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.AES_GCM_CTR_V1), err) - } - return nil + if _, err := iprot.ReadStructBegin(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) + } + + for { + _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) + if err != nil { + return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) + } + if fieldTypeId == thrift.STOP { + break + } + switch fieldId { + case 1: + if fieldTypeId == thrift.STRUCT { + if err := p.ReadField1(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 2: + if fieldTypeId == thrift.STRUCT { + if err := p.ReadField2(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + default: + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + if err := iprot.ReadFieldEnd(ctx); err != nil { + return err + } + } + if err := iprot.ReadStructEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) + } + return nil +} + +func (p *EncryptionAlgorithm) ReadField1(ctx context.Context, iprot thrift.TProtocol) error { + p.AES_GCM_V1 = &AesGcmV1{} + if err := p.AES_GCM_V1.Read(ctx, iprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.AES_GCM_V1), err) + } + return nil +} + +func (p *EncryptionAlgorithm) ReadField2(ctx context.Context, iprot thrift.TProtocol) error { + p.AES_GCM_CTR_V1 = &AesGcmCtrV1{} + if err := p.AES_GCM_CTR_V1.Read(ctx, iprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.AES_GCM_CTR_V1), err) + } + return nil } func (p *EncryptionAlgorithm) Write(ctx context.Context, oprot thrift.TProtocol) error { - if c := p.CountSetFieldsEncryptionAlgorithm(); c != 1 { - return fmt.Errorf("%T write union: exactly one field must be set (%d set)", p, c) - } - if err := oprot.WriteStructBegin(ctx, "EncryptionAlgorithm"); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) } - if p != nil { - if err := p.writeField1(ctx, oprot); err != nil { return err } - if err := p.writeField2(ctx, oprot); err != nil { return err } - } - if err := oprot.WriteFieldStop(ctx); err != nil { - return thrift.PrependError("write field stop error: ", err) } - if err := oprot.WriteStructEnd(ctx); err != nil { - return thrift.PrependError("write struct stop error: ", err) } - return nil + if c := p.CountSetFieldsEncryptionAlgorithm(); c != 1 { + return fmt.Errorf("%T write union: exactly one field must be set (%d set)", p, c) + } + if err := oprot.WriteStructBegin(ctx, "EncryptionAlgorithm"); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) + } + if p != nil { + if err := p.writeField1(ctx, oprot); err != nil { + return err + } + if err := p.writeField2(ctx, oprot); err != nil { + return err + } + } + if err := oprot.WriteFieldStop(ctx); err != nil { + return thrift.PrependError("write field stop error: ", err) + } + if err := oprot.WriteStructEnd(ctx); err != nil { + return thrift.PrependError("write struct stop error: ", err) + } + return nil } func (p *EncryptionAlgorithm) writeField1(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetAES_GCM_V1() { - if err := oprot.WriteFieldBegin(ctx, "AES_GCM_V1", thrift.STRUCT, 1); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:AES_GCM_V1: ", p), err) } - if err := p.AES_GCM_V1.Write(ctx, oprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.AES_GCM_V1), err) - } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 1:AES_GCM_V1: ", p), err) } - } - return err + if p.IsSetAES_GCM_V1() { + if err := oprot.WriteFieldBegin(ctx, "AES_GCM_V1", thrift.STRUCT, 1); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:AES_GCM_V1: ", p), err) + } + if err := p.AES_GCM_V1.Write(ctx, oprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.AES_GCM_V1), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 1:AES_GCM_V1: ", p), err) + } + } + return err } func (p *EncryptionAlgorithm) writeField2(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetAES_GCM_CTR_V1() { - if err := oprot.WriteFieldBegin(ctx, "AES_GCM_CTR_V1", thrift.STRUCT, 2); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:AES_GCM_CTR_V1: ", p), err) } - if err := p.AES_GCM_CTR_V1.Write(ctx, oprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.AES_GCM_CTR_V1), err) - } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 2:AES_GCM_CTR_V1: ", p), err) } - } - return err + if p.IsSetAES_GCM_CTR_V1() { + if err := oprot.WriteFieldBegin(ctx, "AES_GCM_CTR_V1", thrift.STRUCT, 2); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:AES_GCM_CTR_V1: ", p), err) + } + if err := p.AES_GCM_CTR_V1.Write(ctx, oprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.AES_GCM_CTR_V1), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 2:AES_GCM_CTR_V1: ", p), err) + } + } + return err } func (p *EncryptionAlgorithm) Equals(other *EncryptionAlgorithm) bool { - if p == other { - return true - } else if p == nil || other == nil { - return false - } - if !p.AES_GCM_V1.Equals(other.AES_GCM_V1) { return false } - if !p.AES_GCM_CTR_V1.Equals(other.AES_GCM_CTR_V1) { return false } - return true + if p == other { + return true + } else if p == nil || other == nil { + return false + } + if !p.AES_GCM_V1.Equals(other.AES_GCM_V1) { + return false + } + if !p.AES_GCM_CTR_V1.Equals(other.AES_GCM_CTR_V1) { + return false + } + return true } func (p *EncryptionAlgorithm) String() string { - if p == nil { - return "" - } - return fmt.Sprintf("EncryptionAlgorithm(%+v)", *p) + if p == nil { + return "" + } + return fmt.Sprintf("EncryptionAlgorithm(%+v)", *p) } func (p *EncryptionAlgorithm) Validate() error { - return nil + return nil } + // Description for file metadata -// +// // Attributes: -// - Version: Version of this file * -// - Schema: Parquet schema for this file. This schema contains metadata for all the columns. +// - Version: Version of this file * +// - Schema: Parquet schema for this file. This schema contains metadata for all the columns. +// // The schema is represented as a tree with a single root. The nodes of the tree // are flattened to a list by doing a depth-first traversal. // The column metadata contains the path in the schema for that column which can be // used to map columns to nodes in the schema. // The first element is the root * -// - NumRows: Number of rows in this file * -// - RowGroups: Row groups in this file * -// - KeyValueMetadata: Optional key/value metadata * -// - CreatedBy: String for application that wrote this file. This should be in the format +// - NumRows: Number of rows in this file * +// - RowGroups: Row groups in this file * +// - KeyValueMetadata: Optional key/value metadata * +// - CreatedBy: String for application that wrote this file. This should be in the format +// // version (build ). // e.g. impala version 1.0 (build 6cf94d29b2b7115df4de2c06e2ab4326d721eb55) -// -// - ColumnOrders: Sort order used for the min_value and max_value fields in the Statistics +// +// - ColumnOrders: Sort order used for the min_value and max_value fields in the Statistics +// // objects and the min_values and max_values fields in the ColumnIndex // objects of each column in this file. Sort orders are listed in the order // matching the columns in the schema. The indexes are not necessary the same // though, because only leaf nodes of the schema are represented in the list // of sort orders. -// +// // Without column_orders, the meaning of the min_value and max_value fields // in the Statistics object and the ColumnIndex object is undefined. To ensure // well-defined behaviour, if these fields are written to a Parquet file, // column_orders must be written as well. -// +// // The obsolete min and max fields in the Statistics object are always sorted // by signed comparison regardless of column_orders. -// - EncryptionAlgorithm: Encryption algorithm. This field is set only in encrypted files +// - EncryptionAlgorithm: Encryption algorithm. This field is set only in encrypted files +// // with plaintext footer. Files with encrypted footer store algorithm id // in FileCryptoMetaData structure. -// - FooterSigningKeyMetadata: Retrieval metadata of key used for signing the footer. +// - FooterSigningKeyMetadata: Retrieval metadata of key used for signing the footer. +// // Used only in encrypted files with plaintext footer. type FileMetaData struct { - Version int32 `thrift:"version,1,required" db:"version" json:"version"` - Schema []*SchemaElement `thrift:"schema,2,required" db:"schema" json:"schema"` - NumRows int64 `thrift:"num_rows,3,required" db:"num_rows" json:"num_rows"` - RowGroups []*RowGroup `thrift:"row_groups,4,required" db:"row_groups" json:"row_groups"` - KeyValueMetadata []*KeyValue `thrift:"key_value_metadata,5" db:"key_value_metadata" json:"key_value_metadata,omitempty"` - CreatedBy *string `thrift:"created_by,6" db:"created_by" json:"created_by,omitempty"` - ColumnOrders []*ColumnOrder `thrift:"column_orders,7" db:"column_orders" json:"column_orders,omitempty"` - EncryptionAlgorithm *EncryptionAlgorithm `thrift:"encryption_algorithm,8" db:"encryption_algorithm" json:"encryption_algorithm,omitempty"` - FooterSigningKeyMetadata []byte `thrift:"footer_signing_key_metadata,9" db:"footer_signing_key_metadata" json:"footer_signing_key_metadata,omitempty"` + Version int32 `thrift:"version,1,required" db:"version" json:"version"` + Schema []*SchemaElement `thrift:"schema,2,required" db:"schema" json:"schema"` + NumRows int64 `thrift:"num_rows,3,required" db:"num_rows" json:"num_rows"` + RowGroups []*RowGroup `thrift:"row_groups,4,required" db:"row_groups" json:"row_groups"` + KeyValueMetadata []*KeyValue `thrift:"key_value_metadata,5" db:"key_value_metadata" json:"key_value_metadata,omitempty"` + CreatedBy *string `thrift:"created_by,6" db:"created_by" json:"created_by,omitempty"` + ColumnOrders []*ColumnOrder `thrift:"column_orders,7" db:"column_orders" json:"column_orders,omitempty"` + EncryptionAlgorithm *EncryptionAlgorithm `thrift:"encryption_algorithm,8" db:"encryption_algorithm" json:"encryption_algorithm,omitempty"` + FooterSigningKeyMetadata []byte `thrift:"footer_signing_key_metadata,9" db:"footer_signing_key_metadata" json:"footer_signing_key_metadata,omitempty"` } func NewFileMetaData() *FileMetaData { - return &FileMetaData{} + return &FileMetaData{} } - func (p *FileMetaData) GetVersion() int32 { - return p.Version + return p.Version } func (p *FileMetaData) GetSchema() []*SchemaElement { - return p.Schema + return p.Schema } func (p *FileMetaData) GetNumRows() int64 { - return p.NumRows + return p.NumRows } func (p *FileMetaData) GetRowGroups() []*RowGroup { - return p.RowGroups + return p.RowGroups } + var FileMetaData_KeyValueMetadata_DEFAULT []*KeyValue func (p *FileMetaData) GetKeyValueMetadata() []*KeyValue { - return p.KeyValueMetadata + return p.KeyValueMetadata } + var FileMetaData_CreatedBy_DEFAULT string + func (p *FileMetaData) GetCreatedBy() string { - if !p.IsSetCreatedBy() { - return FileMetaData_CreatedBy_DEFAULT - } -return *p.CreatedBy + if !p.IsSetCreatedBy() { + return FileMetaData_CreatedBy_DEFAULT + } + return *p.CreatedBy } + var FileMetaData_ColumnOrders_DEFAULT []*ColumnOrder func (p *FileMetaData) GetColumnOrders() []*ColumnOrder { - return p.ColumnOrders + return p.ColumnOrders } + var FileMetaData_EncryptionAlgorithm_DEFAULT *EncryptionAlgorithm + func (p *FileMetaData) GetEncryptionAlgorithm() *EncryptionAlgorithm { - if !p.IsSetEncryptionAlgorithm() { - return FileMetaData_EncryptionAlgorithm_DEFAULT - } -return p.EncryptionAlgorithm + if !p.IsSetEncryptionAlgorithm() { + return FileMetaData_EncryptionAlgorithm_DEFAULT + } + return p.EncryptionAlgorithm } + var FileMetaData_FooterSigningKeyMetadata_DEFAULT []byte func (p *FileMetaData) GetFooterSigningKeyMetadata() []byte { - return p.FooterSigningKeyMetadata + return p.FooterSigningKeyMetadata } func (p *FileMetaData) IsSetKeyValueMetadata() bool { - return p.KeyValueMetadata != nil + return p.KeyValueMetadata != nil } func (p *FileMetaData) IsSetCreatedBy() bool { - return p.CreatedBy != nil + return p.CreatedBy != nil } func (p *FileMetaData) IsSetColumnOrders() bool { - return p.ColumnOrders != nil + return p.ColumnOrders != nil } func (p *FileMetaData) IsSetEncryptionAlgorithm() bool { - return p.EncryptionAlgorithm != nil + return p.EncryptionAlgorithm != nil } func (p *FileMetaData) IsSetFooterSigningKeyMetadata() bool { - return p.FooterSigningKeyMetadata != nil + return p.FooterSigningKeyMetadata != nil } func (p *FileMetaData) Read(ctx context.Context, iprot thrift.TProtocol) error { - if _, err := iprot.ReadStructBegin(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) - } - - var issetVersion bool = false; - var issetSchema bool = false; - var issetNumRows bool = false; - var issetRowGroups bool = false; - - for { - _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) - if err != nil { - return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) - } - if fieldTypeId == thrift.STOP { break; } - switch fieldId { - case 1: - if fieldTypeId == thrift.I32 { - if err := p.ReadField1(ctx, iprot); err != nil { - return err - } - issetVersion = true - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 2: - if fieldTypeId == thrift.LIST { - if err := p.ReadField2(ctx, iprot); err != nil { - return err - } - issetSchema = true - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 3: - if fieldTypeId == thrift.I64 { - if err := p.ReadField3(ctx, iprot); err != nil { - return err - } - issetNumRows = true - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 4: - if fieldTypeId == thrift.LIST { - if err := p.ReadField4(ctx, iprot); err != nil { - return err - } - issetRowGroups = true - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 5: - if fieldTypeId == thrift.LIST { - if err := p.ReadField5(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 6: - if fieldTypeId == thrift.STRING { - if err := p.ReadField6(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 7: - if fieldTypeId == thrift.LIST { - if err := p.ReadField7(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 8: - if fieldTypeId == thrift.STRUCT { - if err := p.ReadField8(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 9: - if fieldTypeId == thrift.STRING { - if err := p.ReadField9(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - default: - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - if err := iprot.ReadFieldEnd(ctx); err != nil { - return err - } - } - if err := iprot.ReadStructEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) - } - if !issetVersion{ - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Version is not set")); - } - if !issetSchema{ - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Schema is not set")); - } - if !issetNumRows{ - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field NumRows is not set")); - } - if !issetRowGroups{ - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field RowGroups is not set")); - } - return nil -} - -func (p *FileMetaData) ReadField1(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadI32(ctx); err != nil { - return thrift.PrependError("error reading field 1: ", err) -} else { - p.Version = v -} - return nil -} - -func (p *FileMetaData) ReadField2(ctx context.Context, iprot thrift.TProtocol) error { - _, size, err := iprot.ReadListBegin(ctx) - if err != nil { - return thrift.PrependError("error reading list begin: ", err) - } - tSlice := make([]*SchemaElement, 0, size) - p.Schema = tSlice - for i := 0; i < size; i ++ { - _elem24 := &SchemaElement{} - if err := _elem24.Read(ctx, iprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", _elem24), err) - } - p.Schema = append(p.Schema, _elem24) - } - if err := iprot.ReadListEnd(ctx); err != nil { - return thrift.PrependError("error reading list end: ", err) - } - return nil -} - -func (p *FileMetaData) ReadField3(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadI64(ctx); err != nil { - return thrift.PrependError("error reading field 3: ", err) -} else { - p.NumRows = v -} - return nil -} - -func (p *FileMetaData) ReadField4(ctx context.Context, iprot thrift.TProtocol) error { - _, size, err := iprot.ReadListBegin(ctx) - if err != nil { - return thrift.PrependError("error reading list begin: ", err) - } - tSlice := make([]*RowGroup, 0, size) - p.RowGroups = tSlice - for i := 0; i < size; i ++ { - _elem25 := &RowGroup{} - if err := _elem25.Read(ctx, iprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", _elem25), err) - } - p.RowGroups = append(p.RowGroups, _elem25) - } - if err := iprot.ReadListEnd(ctx); err != nil { - return thrift.PrependError("error reading list end: ", err) - } - return nil -} - -func (p *FileMetaData) ReadField5(ctx context.Context, iprot thrift.TProtocol) error { - _, size, err := iprot.ReadListBegin(ctx) - if err != nil { - return thrift.PrependError("error reading list begin: ", err) - } - tSlice := make([]*KeyValue, 0, size) - p.KeyValueMetadata = tSlice - for i := 0; i < size; i ++ { - _elem26 := &KeyValue{} - if err := _elem26.Read(ctx, iprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", _elem26), err) - } - p.KeyValueMetadata = append(p.KeyValueMetadata, _elem26) - } - if err := iprot.ReadListEnd(ctx); err != nil { - return thrift.PrependError("error reading list end: ", err) - } - return nil -} - -func (p *FileMetaData) ReadField6(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadString(ctx); err != nil { - return thrift.PrependError("error reading field 6: ", err) -} else { - p.CreatedBy = &v -} - return nil -} - -func (p *FileMetaData) ReadField7(ctx context.Context, iprot thrift.TProtocol) error { - _, size, err := iprot.ReadListBegin(ctx) - if err != nil { - return thrift.PrependError("error reading list begin: ", err) - } - tSlice := make([]*ColumnOrder, 0, size) - p.ColumnOrders = tSlice - for i := 0; i < size; i ++ { - _elem27 := &ColumnOrder{} - if err := _elem27.Read(ctx, iprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", _elem27), err) - } - p.ColumnOrders = append(p.ColumnOrders, _elem27) - } - if err := iprot.ReadListEnd(ctx); err != nil { - return thrift.PrependError("error reading list end: ", err) - } - return nil -} - -func (p *FileMetaData) ReadField8(ctx context.Context, iprot thrift.TProtocol) error { - p.EncryptionAlgorithm = &EncryptionAlgorithm{} - if err := p.EncryptionAlgorithm.Read(ctx, iprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.EncryptionAlgorithm), err) - } - return nil -} - -func (p *FileMetaData) ReadField9(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadBinary(ctx); err != nil { - return thrift.PrependError("error reading field 9: ", err) -} else { - p.FooterSigningKeyMetadata = v -} - return nil + if _, err := iprot.ReadStructBegin(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) + } + + var issetVersion bool = false + var issetSchema bool = false + var issetNumRows bool = false + var issetRowGroups bool = false + + for { + _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) + if err != nil { + return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) + } + if fieldTypeId == thrift.STOP { + break + } + switch fieldId { + case 1: + if fieldTypeId == thrift.I32 { + if err := p.ReadField1(ctx, iprot); err != nil { + return err + } + issetVersion = true + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 2: + if fieldTypeId == thrift.LIST { + if err := p.ReadField2(ctx, iprot); err != nil { + return err + } + issetSchema = true + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 3: + if fieldTypeId == thrift.I64 { + if err := p.ReadField3(ctx, iprot); err != nil { + return err + } + issetNumRows = true + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 4: + if fieldTypeId == thrift.LIST { + if err := p.ReadField4(ctx, iprot); err != nil { + return err + } + issetRowGroups = true + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 5: + if fieldTypeId == thrift.LIST { + if err := p.ReadField5(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 6: + if fieldTypeId == thrift.STRING { + if err := p.ReadField6(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 7: + if fieldTypeId == thrift.LIST { + if err := p.ReadField7(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 8: + if fieldTypeId == thrift.STRUCT { + if err := p.ReadField8(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 9: + if fieldTypeId == thrift.STRING { + if err := p.ReadField9(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + default: + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + if err := iprot.ReadFieldEnd(ctx); err != nil { + return err + } + } + if err := iprot.ReadStructEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) + } + if !issetVersion { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Version is not set")) + } + if !issetSchema { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Schema is not set")) + } + if !issetNumRows { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field NumRows is not set")) + } + if !issetRowGroups { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field RowGroups is not set")) + } + return nil +} + +func (p *FileMetaData) ReadField1(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadI32(ctx); err != nil { + return thrift.PrependError("error reading field 1: ", err) + } else { + p.Version = v + } + return nil +} + +func (p *FileMetaData) ReadField2(ctx context.Context, iprot thrift.TProtocol) error { + _, size, err := iprot.ReadListBegin(ctx) + if err != nil { + return thrift.PrependError("error reading list begin: ", err) + } + tSlice := make([]*SchemaElement, 0, size) + p.Schema = tSlice + for i := 0; i < size; i++ { + _elem24 := &SchemaElement{} + if err := _elem24.Read(ctx, iprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", _elem24), err) + } + p.Schema = append(p.Schema, _elem24) + } + if err := iprot.ReadListEnd(ctx); err != nil { + return thrift.PrependError("error reading list end: ", err) + } + return nil +} + +func (p *FileMetaData) ReadField3(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadI64(ctx); err != nil { + return thrift.PrependError("error reading field 3: ", err) + } else { + p.NumRows = v + } + return nil +} + +func (p *FileMetaData) ReadField4(ctx context.Context, iprot thrift.TProtocol) error { + _, size, err := iprot.ReadListBegin(ctx) + if err != nil { + return thrift.PrependError("error reading list begin: ", err) + } + tSlice := make([]*RowGroup, 0, size) + p.RowGroups = tSlice + for i := 0; i < size; i++ { + _elem25 := &RowGroup{} + if err := _elem25.Read(ctx, iprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", _elem25), err) + } + p.RowGroups = append(p.RowGroups, _elem25) + } + if err := iprot.ReadListEnd(ctx); err != nil { + return thrift.PrependError("error reading list end: ", err) + } + return nil +} + +func (p *FileMetaData) ReadField5(ctx context.Context, iprot thrift.TProtocol) error { + _, size, err := iprot.ReadListBegin(ctx) + if err != nil { + return thrift.PrependError("error reading list begin: ", err) + } + tSlice := make([]*KeyValue, 0, size) + p.KeyValueMetadata = tSlice + for i := 0; i < size; i++ { + _elem26 := &KeyValue{} + if err := _elem26.Read(ctx, iprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", _elem26), err) + } + p.KeyValueMetadata = append(p.KeyValueMetadata, _elem26) + } + if err := iprot.ReadListEnd(ctx); err != nil { + return thrift.PrependError("error reading list end: ", err) + } + return nil +} + +func (p *FileMetaData) ReadField6(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadString(ctx); err != nil { + return thrift.PrependError("error reading field 6: ", err) + } else { + p.CreatedBy = &v + } + return nil +} + +func (p *FileMetaData) ReadField7(ctx context.Context, iprot thrift.TProtocol) error { + _, size, err := iprot.ReadListBegin(ctx) + if err != nil { + return thrift.PrependError("error reading list begin: ", err) + } + tSlice := make([]*ColumnOrder, 0, size) + p.ColumnOrders = tSlice + for i := 0; i < size; i++ { + _elem27 := &ColumnOrder{} + if err := _elem27.Read(ctx, iprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", _elem27), err) + } + p.ColumnOrders = append(p.ColumnOrders, _elem27) + } + if err := iprot.ReadListEnd(ctx); err != nil { + return thrift.PrependError("error reading list end: ", err) + } + return nil +} + +func (p *FileMetaData) ReadField8(ctx context.Context, iprot thrift.TProtocol) error { + p.EncryptionAlgorithm = &EncryptionAlgorithm{} + if err := p.EncryptionAlgorithm.Read(ctx, iprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.EncryptionAlgorithm), err) + } + return nil +} + +func (p *FileMetaData) ReadField9(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadBinary(ctx); err != nil { + return thrift.PrependError("error reading field 9: ", err) + } else { + p.FooterSigningKeyMetadata = v + } + return nil } func (p *FileMetaData) Write(ctx context.Context, oprot thrift.TProtocol) error { - if err := oprot.WriteStructBegin(ctx, "FileMetaData"); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) } - if p != nil { - if err := p.writeField1(ctx, oprot); err != nil { return err } - if err := p.writeField2(ctx, oprot); err != nil { return err } - if err := p.writeField3(ctx, oprot); err != nil { return err } - if err := p.writeField4(ctx, oprot); err != nil { return err } - if err := p.writeField5(ctx, oprot); err != nil { return err } - if err := p.writeField6(ctx, oprot); err != nil { return err } - if err := p.writeField7(ctx, oprot); err != nil { return err } - if err := p.writeField8(ctx, oprot); err != nil { return err } - if err := p.writeField9(ctx, oprot); err != nil { return err } - } - if err := oprot.WriteFieldStop(ctx); err != nil { - return thrift.PrependError("write field stop error: ", err) } - if err := oprot.WriteStructEnd(ctx); err != nil { - return thrift.PrependError("write struct stop error: ", err) } - return nil + if err := oprot.WriteStructBegin(ctx, "FileMetaData"); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) + } + if p != nil { + if err := p.writeField1(ctx, oprot); err != nil { + return err + } + if err := p.writeField2(ctx, oprot); err != nil { + return err + } + if err := p.writeField3(ctx, oprot); err != nil { + return err + } + if err := p.writeField4(ctx, oprot); err != nil { + return err + } + if err := p.writeField5(ctx, oprot); err != nil { + return err + } + if err := p.writeField6(ctx, oprot); err != nil { + return err + } + if err := p.writeField7(ctx, oprot); err != nil { + return err + } + if err := p.writeField8(ctx, oprot); err != nil { + return err + } + if err := p.writeField9(ctx, oprot); err != nil { + return err + } + } + if err := oprot.WriteFieldStop(ctx); err != nil { + return thrift.PrependError("write field stop error: ", err) + } + if err := oprot.WriteStructEnd(ctx); err != nil { + return thrift.PrependError("write struct stop error: ", err) + } + return nil } func (p *FileMetaData) writeField1(ctx context.Context, oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin(ctx, "version", thrift.I32, 1); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:version: ", p), err) } - if err := oprot.WriteI32(ctx, int32(p.Version)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.version (1) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 1:version: ", p), err) } - return err + if err := oprot.WriteFieldBegin(ctx, "version", thrift.I32, 1); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:version: ", p), err) + } + if err := oprot.WriteI32(ctx, int32(p.Version)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.version (1) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 1:version: ", p), err) + } + return err } func (p *FileMetaData) writeField2(ctx context.Context, oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin(ctx, "schema", thrift.LIST, 2); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:schema: ", p), err) } - if err := oprot.WriteListBegin(ctx, thrift.STRUCT, len(p.Schema)); err != nil { - return thrift.PrependError("error writing list begin: ", err) - } - for _, v := range p.Schema { - if err := v.Write(ctx, oprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", v), err) - } - } - if err := oprot.WriteListEnd(ctx); err != nil { - return thrift.PrependError("error writing list end: ", err) - } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 2:schema: ", p), err) } - return err + if err := oprot.WriteFieldBegin(ctx, "schema", thrift.LIST, 2); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:schema: ", p), err) + } + if err := oprot.WriteListBegin(ctx, thrift.STRUCT, len(p.Schema)); err != nil { + return thrift.PrependError("error writing list begin: ", err) + } + for _, v := range p.Schema { + if err := v.Write(ctx, oprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", v), err) + } + } + if err := oprot.WriteListEnd(ctx); err != nil { + return thrift.PrependError("error writing list end: ", err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 2:schema: ", p), err) + } + return err } func (p *FileMetaData) writeField3(ctx context.Context, oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin(ctx, "num_rows", thrift.I64, 3); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 3:num_rows: ", p), err) } - if err := oprot.WriteI64(ctx, int64(p.NumRows)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.num_rows (3) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 3:num_rows: ", p), err) } - return err + if err := oprot.WriteFieldBegin(ctx, "num_rows", thrift.I64, 3); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 3:num_rows: ", p), err) + } + if err := oprot.WriteI64(ctx, int64(p.NumRows)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.num_rows (3) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 3:num_rows: ", p), err) + } + return err } func (p *FileMetaData) writeField4(ctx context.Context, oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin(ctx, "row_groups", thrift.LIST, 4); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 4:row_groups: ", p), err) } - if err := oprot.WriteListBegin(ctx, thrift.STRUCT, len(p.RowGroups)); err != nil { - return thrift.PrependError("error writing list begin: ", err) - } - for _, v := range p.RowGroups { - if err := v.Write(ctx, oprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", v), err) - } - } - if err := oprot.WriteListEnd(ctx); err != nil { - return thrift.PrependError("error writing list end: ", err) - } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 4:row_groups: ", p), err) } - return err + if err := oprot.WriteFieldBegin(ctx, "row_groups", thrift.LIST, 4); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 4:row_groups: ", p), err) + } + if err := oprot.WriteListBegin(ctx, thrift.STRUCT, len(p.RowGroups)); err != nil { + return thrift.PrependError("error writing list begin: ", err) + } + for _, v := range p.RowGroups { + if err := v.Write(ctx, oprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", v), err) + } + } + if err := oprot.WriteListEnd(ctx); err != nil { + return thrift.PrependError("error writing list end: ", err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 4:row_groups: ", p), err) + } + return err } func (p *FileMetaData) writeField5(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetKeyValueMetadata() { - if err := oprot.WriteFieldBegin(ctx, "key_value_metadata", thrift.LIST, 5); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 5:key_value_metadata: ", p), err) } - if err := oprot.WriteListBegin(ctx, thrift.STRUCT, len(p.KeyValueMetadata)); err != nil { - return thrift.PrependError("error writing list begin: ", err) - } - for _, v := range p.KeyValueMetadata { - if err := v.Write(ctx, oprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", v), err) - } - } - if err := oprot.WriteListEnd(ctx); err != nil { - return thrift.PrependError("error writing list end: ", err) - } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 5:key_value_metadata: ", p), err) } - } - return err + if p.IsSetKeyValueMetadata() { + if err := oprot.WriteFieldBegin(ctx, "key_value_metadata", thrift.LIST, 5); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 5:key_value_metadata: ", p), err) + } + if err := oprot.WriteListBegin(ctx, thrift.STRUCT, len(p.KeyValueMetadata)); err != nil { + return thrift.PrependError("error writing list begin: ", err) + } + for _, v := range p.KeyValueMetadata { + if err := v.Write(ctx, oprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", v), err) + } + } + if err := oprot.WriteListEnd(ctx); err != nil { + return thrift.PrependError("error writing list end: ", err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 5:key_value_metadata: ", p), err) + } + } + return err } func (p *FileMetaData) writeField6(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetCreatedBy() { - if err := oprot.WriteFieldBegin(ctx, "created_by", thrift.STRING, 6); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 6:created_by: ", p), err) } - if err := oprot.WriteString(ctx, string(*p.CreatedBy)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.created_by (6) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 6:created_by: ", p), err) } - } - return err + if p.IsSetCreatedBy() { + if err := oprot.WriteFieldBegin(ctx, "created_by", thrift.STRING, 6); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 6:created_by: ", p), err) + } + if err := oprot.WriteString(ctx, string(*p.CreatedBy)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.created_by (6) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 6:created_by: ", p), err) + } + } + return err } func (p *FileMetaData) writeField7(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetColumnOrders() { - if err := oprot.WriteFieldBegin(ctx, "column_orders", thrift.LIST, 7); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 7:column_orders: ", p), err) } - if err := oprot.WriteListBegin(ctx, thrift.STRUCT, len(p.ColumnOrders)); err != nil { - return thrift.PrependError("error writing list begin: ", err) - } - for _, v := range p.ColumnOrders { - if err := v.Write(ctx, oprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", v), err) - } - } - if err := oprot.WriteListEnd(ctx); err != nil { - return thrift.PrependError("error writing list end: ", err) - } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 7:column_orders: ", p), err) } - } - return err + if p.IsSetColumnOrders() { + if err := oprot.WriteFieldBegin(ctx, "column_orders", thrift.LIST, 7); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 7:column_orders: ", p), err) + } + if err := oprot.WriteListBegin(ctx, thrift.STRUCT, len(p.ColumnOrders)); err != nil { + return thrift.PrependError("error writing list begin: ", err) + } + for _, v := range p.ColumnOrders { + if err := v.Write(ctx, oprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", v), err) + } + } + if err := oprot.WriteListEnd(ctx); err != nil { + return thrift.PrependError("error writing list end: ", err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 7:column_orders: ", p), err) + } + } + return err } func (p *FileMetaData) writeField8(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetEncryptionAlgorithm() { - if err := oprot.WriteFieldBegin(ctx, "encryption_algorithm", thrift.STRUCT, 8); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 8:encryption_algorithm: ", p), err) } - if err := p.EncryptionAlgorithm.Write(ctx, oprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.EncryptionAlgorithm), err) - } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 8:encryption_algorithm: ", p), err) } - } - return err + if p.IsSetEncryptionAlgorithm() { + if err := oprot.WriteFieldBegin(ctx, "encryption_algorithm", thrift.STRUCT, 8); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 8:encryption_algorithm: ", p), err) + } + if err := p.EncryptionAlgorithm.Write(ctx, oprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.EncryptionAlgorithm), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 8:encryption_algorithm: ", p), err) + } + } + return err } func (p *FileMetaData) writeField9(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetFooterSigningKeyMetadata() { - if err := oprot.WriteFieldBegin(ctx, "footer_signing_key_metadata", thrift.STRING, 9); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 9:footer_signing_key_metadata: ", p), err) } - if err := oprot.WriteBinary(ctx, p.FooterSigningKeyMetadata); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.footer_signing_key_metadata (9) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 9:footer_signing_key_metadata: ", p), err) } - } - return err + if p.IsSetFooterSigningKeyMetadata() { + if err := oprot.WriteFieldBegin(ctx, "footer_signing_key_metadata", thrift.STRING, 9); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 9:footer_signing_key_metadata: ", p), err) + } + if err := oprot.WriteBinary(ctx, p.FooterSigningKeyMetadata); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.footer_signing_key_metadata (9) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 9:footer_signing_key_metadata: ", p), err) + } + } + return err } func (p *FileMetaData) Equals(other *FileMetaData) bool { - if p == other { - return true - } else if p == nil || other == nil { - return false - } - if p.Version != other.Version { return false } - if len(p.Schema) != len(other.Schema) { return false } - for i, _tgt := range p.Schema { - _src28 := other.Schema[i] - if !_tgt.Equals(_src28) { return false } - } - if p.NumRows != other.NumRows { return false } - if len(p.RowGroups) != len(other.RowGroups) { return false } - for i, _tgt := range p.RowGroups { - _src29 := other.RowGroups[i] - if !_tgt.Equals(_src29) { return false } - } - if len(p.KeyValueMetadata) != len(other.KeyValueMetadata) { return false } - for i, _tgt := range p.KeyValueMetadata { - _src30 := other.KeyValueMetadata[i] - if !_tgt.Equals(_src30) { return false } - } - if p.CreatedBy != other.CreatedBy { - if p.CreatedBy == nil || other.CreatedBy == nil { - return false - } - if (*p.CreatedBy) != (*other.CreatedBy) { return false } - } - if len(p.ColumnOrders) != len(other.ColumnOrders) { return false } - for i, _tgt := range p.ColumnOrders { - _src31 := other.ColumnOrders[i] - if !_tgt.Equals(_src31) { return false } - } - if !p.EncryptionAlgorithm.Equals(other.EncryptionAlgorithm) { return false } - if bytes.Compare(p.FooterSigningKeyMetadata, other.FooterSigningKeyMetadata) != 0 { return false } - return true + if p == other { + return true + } else if p == nil || other == nil { + return false + } + if p.Version != other.Version { + return false + } + if len(p.Schema) != len(other.Schema) { + return false + } + for i, _tgt := range p.Schema { + _src28 := other.Schema[i] + if !_tgt.Equals(_src28) { + return false + } + } + if p.NumRows != other.NumRows { + return false + } + if len(p.RowGroups) != len(other.RowGroups) { + return false + } + for i, _tgt := range p.RowGroups { + _src29 := other.RowGroups[i] + if !_tgt.Equals(_src29) { + return false + } + } + if len(p.KeyValueMetadata) != len(other.KeyValueMetadata) { + return false + } + for i, _tgt := range p.KeyValueMetadata { + _src30 := other.KeyValueMetadata[i] + if !_tgt.Equals(_src30) { + return false + } + } + if p.CreatedBy != other.CreatedBy { + if p.CreatedBy == nil || other.CreatedBy == nil { + return false + } + if (*p.CreatedBy) != (*other.CreatedBy) { + return false + } + } + if len(p.ColumnOrders) != len(other.ColumnOrders) { + return false + } + for i, _tgt := range p.ColumnOrders { + _src31 := other.ColumnOrders[i] + if !_tgt.Equals(_src31) { + return false + } + } + if !p.EncryptionAlgorithm.Equals(other.EncryptionAlgorithm) { + return false + } + if bytes.Compare(p.FooterSigningKeyMetadata, other.FooterSigningKeyMetadata) != 0 { + return false + } + return true } func (p *FileMetaData) String() string { - if p == nil { - return "" - } - return fmt.Sprintf("FileMetaData(%+v)", *p) + if p == nil { + return "" + } + return fmt.Sprintf("FileMetaData(%+v)", *p) } func (p *FileMetaData) Validate() error { - return nil + return nil } + // Crypto metadata for files with encrypted footer * -// +// // Attributes: -// - EncryptionAlgorithm: Encryption algorithm. This field is only used for files +// - EncryptionAlgorithm: Encryption algorithm. This field is only used for files +// // with encrypted footer. Files with plaintext footer store algorithm id // inside footer (FileMetaData structure). -// - KeyMetadata: Retrieval metadata of key used for encryption of footer, +// - KeyMetadata: Retrieval metadata of key used for encryption of footer, +// // and (possibly) columns * type FileCryptoMetaData struct { - EncryptionAlgorithm *EncryptionAlgorithm `thrift:"encryption_algorithm,1,required" db:"encryption_algorithm" json:"encryption_algorithm"` - KeyMetadata []byte `thrift:"key_metadata,2" db:"key_metadata" json:"key_metadata,omitempty"` + EncryptionAlgorithm *EncryptionAlgorithm `thrift:"encryption_algorithm,1,required" db:"encryption_algorithm" json:"encryption_algorithm"` + KeyMetadata []byte `thrift:"key_metadata,2" db:"key_metadata" json:"key_metadata,omitempty"` } func NewFileCryptoMetaData() *FileCryptoMetaData { - return &FileCryptoMetaData{} + return &FileCryptoMetaData{} } var FileCryptoMetaData_EncryptionAlgorithm_DEFAULT *EncryptionAlgorithm + func (p *FileCryptoMetaData) GetEncryptionAlgorithm() *EncryptionAlgorithm { - if !p.IsSetEncryptionAlgorithm() { - return FileCryptoMetaData_EncryptionAlgorithm_DEFAULT - } -return p.EncryptionAlgorithm + if !p.IsSetEncryptionAlgorithm() { + return FileCryptoMetaData_EncryptionAlgorithm_DEFAULT + } + return p.EncryptionAlgorithm } + var FileCryptoMetaData_KeyMetadata_DEFAULT []byte func (p *FileCryptoMetaData) GetKeyMetadata() []byte { - return p.KeyMetadata + return p.KeyMetadata } func (p *FileCryptoMetaData) IsSetEncryptionAlgorithm() bool { - return p.EncryptionAlgorithm != nil + return p.EncryptionAlgorithm != nil } func (p *FileCryptoMetaData) IsSetKeyMetadata() bool { - return p.KeyMetadata != nil + return p.KeyMetadata != nil } func (p *FileCryptoMetaData) Read(ctx context.Context, iprot thrift.TProtocol) error { - if _, err := iprot.ReadStructBegin(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) - } - - var issetEncryptionAlgorithm bool = false; - - for { - _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) - if err != nil { - return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) - } - if fieldTypeId == thrift.STOP { break; } - switch fieldId { - case 1: - if fieldTypeId == thrift.STRUCT { - if err := p.ReadField1(ctx, iprot); err != nil { - return err - } - issetEncryptionAlgorithm = true - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 2: - if fieldTypeId == thrift.STRING { - if err := p.ReadField2(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - default: - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - if err := iprot.ReadFieldEnd(ctx); err != nil { - return err - } - } - if err := iprot.ReadStructEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) - } - if !issetEncryptionAlgorithm{ - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field EncryptionAlgorithm is not set")); - } - return nil -} - -func (p *FileCryptoMetaData) ReadField1(ctx context.Context, iprot thrift.TProtocol) error { - p.EncryptionAlgorithm = &EncryptionAlgorithm{} - if err := p.EncryptionAlgorithm.Read(ctx, iprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.EncryptionAlgorithm), err) - } - return nil -} - -func (p *FileCryptoMetaData) ReadField2(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadBinary(ctx); err != nil { - return thrift.PrependError("error reading field 2: ", err) -} else { - p.KeyMetadata = v -} - return nil + if _, err := iprot.ReadStructBegin(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) + } + + var issetEncryptionAlgorithm bool = false + + for { + _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) + if err != nil { + return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) + } + if fieldTypeId == thrift.STOP { + break + } + switch fieldId { + case 1: + if fieldTypeId == thrift.STRUCT { + if err := p.ReadField1(ctx, iprot); err != nil { + return err + } + issetEncryptionAlgorithm = true + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 2: + if fieldTypeId == thrift.STRING { + if err := p.ReadField2(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + default: + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + if err := iprot.ReadFieldEnd(ctx); err != nil { + return err + } + } + if err := iprot.ReadStructEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) + } + if !issetEncryptionAlgorithm { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field EncryptionAlgorithm is not set")) + } + return nil +} + +func (p *FileCryptoMetaData) ReadField1(ctx context.Context, iprot thrift.TProtocol) error { + p.EncryptionAlgorithm = &EncryptionAlgorithm{} + if err := p.EncryptionAlgorithm.Read(ctx, iprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.EncryptionAlgorithm), err) + } + return nil +} + +func (p *FileCryptoMetaData) ReadField2(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadBinary(ctx); err != nil { + return thrift.PrependError("error reading field 2: ", err) + } else { + p.KeyMetadata = v + } + return nil } func (p *FileCryptoMetaData) Write(ctx context.Context, oprot thrift.TProtocol) error { - if err := oprot.WriteStructBegin(ctx, "FileCryptoMetaData"); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) } - if p != nil { - if err := p.writeField1(ctx, oprot); err != nil { return err } - if err := p.writeField2(ctx, oprot); err != nil { return err } - } - if err := oprot.WriteFieldStop(ctx); err != nil { - return thrift.PrependError("write field stop error: ", err) } - if err := oprot.WriteStructEnd(ctx); err != nil { - return thrift.PrependError("write struct stop error: ", err) } - return nil + if err := oprot.WriteStructBegin(ctx, "FileCryptoMetaData"); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) + } + if p != nil { + if err := p.writeField1(ctx, oprot); err != nil { + return err + } + if err := p.writeField2(ctx, oprot); err != nil { + return err + } + } + if err := oprot.WriteFieldStop(ctx); err != nil { + return thrift.PrependError("write field stop error: ", err) + } + if err := oprot.WriteStructEnd(ctx); err != nil { + return thrift.PrependError("write struct stop error: ", err) + } + return nil } func (p *FileCryptoMetaData) writeField1(ctx context.Context, oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin(ctx, "encryption_algorithm", thrift.STRUCT, 1); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:encryption_algorithm: ", p), err) } - if err := p.EncryptionAlgorithm.Write(ctx, oprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.EncryptionAlgorithm), err) - } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 1:encryption_algorithm: ", p), err) } - return err + if err := oprot.WriteFieldBegin(ctx, "encryption_algorithm", thrift.STRUCT, 1); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:encryption_algorithm: ", p), err) + } + if err := p.EncryptionAlgorithm.Write(ctx, oprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.EncryptionAlgorithm), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 1:encryption_algorithm: ", p), err) + } + return err } func (p *FileCryptoMetaData) writeField2(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetKeyMetadata() { - if err := oprot.WriteFieldBegin(ctx, "key_metadata", thrift.STRING, 2); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:key_metadata: ", p), err) } - if err := oprot.WriteBinary(ctx, p.KeyMetadata); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.key_metadata (2) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 2:key_metadata: ", p), err) } - } - return err + if p.IsSetKeyMetadata() { + if err := oprot.WriteFieldBegin(ctx, "key_metadata", thrift.STRING, 2); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:key_metadata: ", p), err) + } + if err := oprot.WriteBinary(ctx, p.KeyMetadata); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.key_metadata (2) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 2:key_metadata: ", p), err) + } + } + return err } func (p *FileCryptoMetaData) Equals(other *FileCryptoMetaData) bool { - if p == other { - return true - } else if p == nil || other == nil { - return false - } - if !p.EncryptionAlgorithm.Equals(other.EncryptionAlgorithm) { return false } - if bytes.Compare(p.KeyMetadata, other.KeyMetadata) != 0 { return false } - return true + if p == other { + return true + } else if p == nil || other == nil { + return false + } + if !p.EncryptionAlgorithm.Equals(other.EncryptionAlgorithm) { + return false + } + if bytes.Compare(p.KeyMetadata, other.KeyMetadata) != 0 { + return false + } + return true } func (p *FileCryptoMetaData) String() string { - if p == nil { - return "" - } - return fmt.Sprintf("FileCryptoMetaData(%+v)", *p) + if p == nil { + return "" + } + return fmt.Sprintf("FileCryptoMetaData(%+v)", *p) } func (p *FileCryptoMetaData) Validate() error { - return nil + return nil } diff --git a/go/parquet/internal/utils/bit_packing_avx2_amd64.go b/go/parquet/internal/utils/bit_packing_avx2_amd64.go index ee01f002b5ece..0455ccc505bfe 100644 --- a/go/parquet/internal/utils/bit_packing_avx2_amd64.go +++ b/go/parquet/internal/utils/bit_packing_avx2_amd64.go @@ -14,6 +14,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build !noasm // +build !noasm package utils diff --git a/go/parquet/internal/utils/bit_packing_neon_arm64.go b/go/parquet/internal/utils/bit_packing_neon_arm64.go index 8d09c891155ef..09154e3e4b7dd 100755 --- a/go/parquet/internal/utils/bit_packing_neon_arm64.go +++ b/go/parquet/internal/utils/bit_packing_neon_arm64.go @@ -14,6 +14,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build !noasm // +build !noasm package utils diff --git a/go/parquet/internal/utils/unpack_bool_amd64.go b/go/parquet/internal/utils/unpack_bool_amd64.go index 1e9680db4b21a..2b2054f3b00b8 100644 --- a/go/parquet/internal/utils/unpack_bool_amd64.go +++ b/go/parquet/internal/utils/unpack_bool_amd64.go @@ -14,6 +14,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build !noasm // +build !noasm package utils diff --git a/go/parquet/internal/utils/unpack_bool_arm64.go b/go/parquet/internal/utils/unpack_bool_arm64.go index 2c3b19eca458b..879ffd3c9540d 100644 --- a/go/parquet/internal/utils/unpack_bool_arm64.go +++ b/go/parquet/internal/utils/unpack_bool_arm64.go @@ -14,6 +14,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build !noasm // +build !noasm package utils @@ -21,13 +22,14 @@ package utils import ( "os" "strings" + + "golang.org/x/sys/cpu" ) -import "golang.org/x/sys/cpu" var byteToBoolFunc func([]byte, []bool) func init() { - // Added ability to enable extension via environment: + // Added ability to enable extension via environment: // ARM_ENABLE_EXT=NEON go test if ext, ok := os.LookupEnv("ARM_ENABLE_EXT"); ok { exts := strings.Split(ext, ",") diff --git a/go/parquet/internal/utils/unpack_bool_avx2_amd64.go b/go/parquet/internal/utils/unpack_bool_avx2_amd64.go index e0065e5aad16d..cec772a2ccf97 100644 --- a/go/parquet/internal/utils/unpack_bool_avx2_amd64.go +++ b/go/parquet/internal/utils/unpack_bool_avx2_amd64.go @@ -14,6 +14,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build !noasm // +build !noasm package utils diff --git a/go/parquet/internal/utils/unpack_bool_neon_arm64.go b/go/parquet/internal/utils/unpack_bool_neon_arm64.go index 2e9808abbf157..ed46ce29e0309 100755 --- a/go/parquet/internal/utils/unpack_bool_neon_arm64.go +++ b/go/parquet/internal/utils/unpack_bool_neon_arm64.go @@ -14,6 +14,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build !noasm // +build !noasm package utils diff --git a/go/parquet/internal/utils/unpack_bool_noasm.go b/go/parquet/internal/utils/unpack_bool_noasm.go index a715366c6418d..eba20fa9c0f56 100644 --- a/go/parquet/internal/utils/unpack_bool_noasm.go +++ b/go/parquet/internal/utils/unpack_bool_noasm.go @@ -14,6 +14,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build noasm // +build noasm package utils diff --git a/go/parquet/internal/utils/unpack_bool_sse4_amd64.go b/go/parquet/internal/utils/unpack_bool_sse4_amd64.go index 85e4aa77df73b..d00c37474e61c 100644 --- a/go/parquet/internal/utils/unpack_bool_sse4_amd64.go +++ b/go/parquet/internal/utils/unpack_bool_sse4_amd64.go @@ -14,6 +14,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build !noasm // +build !noasm package utils diff --git a/go/parquet/metadata/app_version.go b/go/parquet/metadata/app_version.go index f61c4c9703f78..fa54aec347575 100644 --- a/go/parquet/metadata/app_version.go +++ b/go/parquet/metadata/app_version.go @@ -74,7 +74,8 @@ func NewAppVersionExplicit(app string, major, minor, patch int) *AppVersion { // NewAppVersion parses a "created by" string such as "parquet-go 1.0.0". // // It also supports handling pre-releases and build info such as -// parquet-cpp version 1.5.0ab-xyz5.5.0+cd (build abcd) +// +// parquet-cpp version 1.5.0ab-xyz5.5.0+cd (build abcd) func NewAppVersion(createdby string) *AppVersion { v := &AppVersion{} diff --git a/go/parquet/pqarrow/file_writer_test.go b/go/parquet/pqarrow/file_writer_test.go index 425e4479f6d5c..fc965279a928d 100644 --- a/go/parquet/pqarrow/file_writer_test.go +++ b/go/parquet/pqarrow/file_writer_test.go @@ -18,6 +18,7 @@ package pqarrow_test import ( "bytes" + "math" "strings" "testing" @@ -87,3 +88,44 @@ func TestFileWriterNumRows(t *testing.T) { require.NoError(t, writer.Close()) assert.Equal(t, 4, writer.NumRows()) } + +func TestFileWriterBuffered(t *testing.T) { + schema := arrow.NewSchema([]arrow.Field{ + {Name: "one", Nullable: true, Type: arrow.PrimitiveTypes.Float64}, + {Name: "two", Nullable: true, Type: arrow.PrimitiveTypes.Float64}, + }, nil) + + data := `[ + {"one": 1, "two": 2}, + {"one": 1, "two": null}, + {"one": null, "two": 2}, + {"one": null, "two": null} + ]` + + alloc := memory.NewCheckedAllocator(memory.DefaultAllocator) + defer alloc.AssertSize(t, 0) + + record, _, err := array.RecordFromJSON(alloc, schema, strings.NewReader(data)) + require.NoError(t, err) + defer record.Release() + + output := &bytes.Buffer{} + writer, err := pqarrow.NewFileWriter( + schema, + output, + parquet.NewWriterProperties( + parquet.WithAllocator(alloc), + // Ensure enough space so we can close the writer with rows still buffered + parquet.WithMaxRowGroupLength(math.MaxInt64), + ), + pqarrow.NewArrowWriterProperties( + pqarrow.WithAllocator(alloc), + ), + ) + require.NoError(t, err) + + require.NoError(t, writer.WriteBuffered(record)) + + require.NoError(t, writer.Close()) + assert.Equal(t, 4, writer.NumRows()) +} diff --git a/go/parquet/schema/reflection.go b/go/parquet/schema/reflection.go index 1ec9c72f3dfc1..f961c6ef26d08 100644 --- a/go/parquet/schema/reflection.go +++ b/go/parquet/schema/reflection.go @@ -551,7 +551,7 @@ func typeToNode(name string, typ reflect.Type, repType parquet.Repetition, info // NewSchemaFromStruct generates a schema from an object type via reflection of // the type and reading struct tags for "parquet". // -// Rules +// # Rules // // Everything defaults to Required repetition, unless otherwise specified. // Pointer types become Optional repetition. @@ -571,7 +571,7 @@ func typeToNode(name string, typ reflect.Type, repType parquet.Repetition, info // // maps will become appropriate Map structures in the schema of the defined key and values. // -// Available Tags +// # Available Tags // // name: by default the node will have the same name as the field, this tag let's you specify a name // diff --git a/go/parquet/tools.go b/go/parquet/tools.go index b9ce84def5ae0..64e9419e4f711 100644 --- a/go/parquet/tools.go +++ b/go/parquet/tools.go @@ -14,6 +14,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build tools // +build tools package tools diff --git a/.mvn/gradle-enterprise.xml b/java/.mvn/develocity.xml similarity index 78% rename from .mvn/gradle-enterprise.xml rename to java/.mvn/develocity.xml index bae5a3f147e68..df3cbccd2b6cb 100644 --- a/.mvn/gradle-enterprise.xml +++ b/java/.mvn/develocity.xml @@ -1,4 +1,4 @@ - + - + https://ge.apache.org false - true + true true true #{isFalse(env['CI'])} - ALWAYS + true true #{{'0.0.0.0'}} @@ -42,4 +42,4 @@ false - + diff --git a/.mvn/extensions.xml b/java/.mvn/extensions.xml similarity index 90% rename from .mvn/extensions.xml rename to java/.mvn/extensions.xml index b446c647e47e6..b56ab0fd7772a 100644 --- a/.mvn/extensions.xml +++ b/java/.mvn/extensions.xml @@ -22,12 +22,12 @@ com.gradle - gradle-enterprise-maven-extension - 1.20 + develocity-maven-extension + 1.21.4 com.gradle common-custom-user-data-maven-extension - 1.12.5 + 2.0 diff --git a/java/adapter/avro/pom.xml b/java/adapter/avro/pom.xml index 9ddc150253874..0af1641aa1041 100644 --- a/java/adapter/avro/pom.xml +++ b/java/adapter/avro/pom.xml @@ -24,37 +24,33 @@ (Contrib/Experimental) A library for converting Avro data to Arrow data. http://maven.apache.org - + + dev/checkstyle/checkstyle-spotless.xml + none + - + org.apache.arrow arrow-memory-core - - org.apache.arrow arrow-memory-netty runtime - - org.apache.arrow arrow-vector - org.immutables - value + value-annotations - org.apache.avro avro ${dep.avro.version} -
diff --git a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/AvroToArrow.java b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/AvroToArrow.java index 8baa60a72ddc3..2392c36f94cee 100644 --- a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/AvroToArrow.java +++ b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/AvroToArrow.java @@ -14,24 +14,20 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.avro; import java.io.IOException; - import org.apache.arrow.util.Preconditions; import org.apache.arrow.vector.VectorSchemaRoot; import org.apache.avro.Schema; import org.apache.avro.io.Decoder; -/** - * Utility class to convert Avro objects to columnar Arrow format objects. - */ +/** Utility class to convert Avro objects to columnar Arrow format objects. */ public class AvroToArrow { /** - * Fetch the data from {@link Decoder} and convert it to Arrow objects. - * Only for testing purpose. + * Fetch the data from {@link Decoder} and convert it to Arrow objects. Only for testing purpose. + * * @param schema avro schema. * @param decoder avro decoder * @param config configuration of the conversion. @@ -48,15 +44,14 @@ static VectorSchemaRoot avroToArrow(Schema schema, Decoder decoder, AvroToArrowC /** * Fetch the data from {@link Decoder} and iteratively convert it to Arrow objects. + * * @param schema avro schema * @param decoder avro decoder * @param config configuration of the conversion. * @throws IOException on error */ public static AvroToArrowVectorIterator avroToArrowIterator( - Schema schema, - Decoder decoder, - AvroToArrowConfig config) throws IOException { + Schema schema, Decoder decoder, AvroToArrowConfig config) throws IOException { Preconditions.checkNotNull(schema, "Avro schema object cannot be null"); Preconditions.checkNotNull(decoder, "Avro decoder object cannot be null"); diff --git a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/AvroToArrowConfig.java b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/AvroToArrowConfig.java index f9210fb012523..290d1a77d956c 100644 --- a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/AvroToArrowConfig.java +++ b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/AvroToArrowConfig.java @@ -14,40 +14,35 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.avro; import java.util.Set; - import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.util.Preconditions; import org.apache.arrow.vector.dictionary.DictionaryProvider; -/** - * This class configures the Avro-to-Arrow conversion process. - */ +/** This class configures the Avro-to-Arrow conversion process. */ public class AvroToArrowConfig { private final BufferAllocator allocator; /** - * The maximum rowCount to read each time when partially convert data. - * Default value is 1024 and -1 means read all data into one vector. + * The maximum rowCount to read each time when partially convert data. Default value is 1024 and + * -1 means read all data into one vector. */ private final int targetBatchSize; /** - * The dictionary provider used for enum type. - * If avro schema has enum type, will create dictionary and update this provider. + * The dictionary provider used for enum type. If avro schema has enum type, will create + * dictionary and update this provider. */ private final DictionaryProvider.MapDictionaryProvider provider; - /** - * The field names which to skip when reading decoder values. - */ + /** The field names which to skip when reading decoder values. */ private final Set skipFieldNames; /** * Instantiate an instance. + * * @param allocator The memory allocator to construct the Arrow vectors with. * @param targetBatchSize The maximum rowCount to read each time when partially convert data. * @param provider The dictionary provider used for enum type, adapter will update this provider. @@ -59,8 +54,10 @@ public class AvroToArrowConfig { DictionaryProvider.MapDictionaryProvider provider, Set skipFieldNames) { - Preconditions.checkArgument(targetBatchSize == AvroToArrowVectorIterator.NO_LIMIT_BATCH_SIZE || - targetBatchSize > 0, "invalid targetBatchSize: %s", targetBatchSize); + Preconditions.checkArgument( + targetBatchSize == AvroToArrowVectorIterator.NO_LIMIT_BATCH_SIZE || targetBatchSize > 0, + "invalid targetBatchSize: %s", + targetBatchSize); this.allocator = allocator; this.targetBatchSize = targetBatchSize; diff --git a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/AvroToArrowConfigBuilder.java b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/AvroToArrowConfigBuilder.java index 41e486d0a1ce0..1fa176a7fea38 100644 --- a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/AvroToArrowConfigBuilder.java +++ b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/AvroToArrowConfigBuilder.java @@ -14,18 +14,14 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.avro; import java.util.HashSet; import java.util.Set; - import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.vector.dictionary.DictionaryProvider; -/** - * This class builds {@link AvroToArrowConfig}s. - */ +/** This class builds {@link AvroToArrowConfig}s. */ public class AvroToArrowConfigBuilder { private BufferAllocator allocator; @@ -36,9 +32,7 @@ public class AvroToArrowConfigBuilder { private Set skipFieldNames; - /** - * Default constructor for the {@link AvroToArrowConfigBuilder}. - */ + /** Default constructor for the {@link AvroToArrowConfigBuilder}. */ public AvroToArrowConfigBuilder(BufferAllocator allocator) { this.allocator = allocator; this.targetBatchSize = AvroToArrowVectorIterator.DEFAULT_BATCH_SIZE; @@ -61,14 +55,8 @@ public AvroToArrowConfigBuilder setSkipFieldNames(Set skipFieldNames) { return this; } - /** - * This builds the {@link AvroToArrowConfig} from the provided params. - */ + /** This builds the {@link AvroToArrowConfig} from the provided params. */ public AvroToArrowConfig build() { - return new AvroToArrowConfig( - allocator, - targetBatchSize, - provider, - skipFieldNames); + return new AvroToArrowConfig(allocator, targetBatchSize, provider, skipFieldNames); } } diff --git a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/AvroToArrowUtils.java b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/AvroToArrowUtils.java index 1f5ad9e768950..b39121cfd1ae7 100644 --- a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/AvroToArrowUtils.java +++ b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/AvroToArrowUtils.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.avro; import static org.apache.arrow.vector.types.FloatingPointPrecision.DOUBLE; @@ -27,10 +26,10 @@ import java.util.Arrays; import java.util.HashMap; import java.util.List; +import java.util.Locale; import java.util.Map; import java.util.Set; import java.util.stream.Collectors; - import org.apache.arrow.adapter.avro.consumers.AvroArraysConsumer; import org.apache.arrow.adapter.avro.consumers.AvroBooleanConsumer; import org.apache.arrow.adapter.avro.consumers.AvroBytesConsumer; @@ -95,7 +94,6 @@ import org.apache.avro.LogicalType; import org.apache.avro.LogicalTypes; import org.apache.avro.Schema; -import org.apache.avro.Schema.Type; import org.apache.avro.io.Decoder; /** @@ -106,36 +104,37 @@ public class AvroToArrowUtils { /** * Creates a {@link Consumer} from the {@link Schema} * -

This method currently performs following type mapping for Avro data types to corresponding Arrow data types. + *

This method currently performs following type mapping for Avro data types to corresponding + * Arrow data types. * *

    - *
  • STRING --> ArrowType.Utf8
  • - *
  • INT --> ArrowType.Int(32, signed)
  • - *
  • LONG --> ArrowType.Int(64, signed)
  • - *
  • FLOAT --> ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE)
  • - *
  • DOUBLE --> ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE)
  • - *
  • BOOLEAN --> ArrowType.Bool
  • - *
  • BYTES --> ArrowType.Binary
  • - *
  • ARRAY --> ArrowType.List
  • - *
  • MAP --> ArrowType.Map
  • - *
  • FIXED --> ArrowType.FixedSizeBinary
  • - *
  • RECORD --> ArrowType.Struct
  • - *
  • UNION --> ArrowType.Union
  • - *
  • ENUM--> ArrowType.Int
  • - *
  • DECIMAL --> ArrowType.Decimal
  • - *
  • Date --> ArrowType.Date(DateUnit.DAY)
  • - *
  • TimeMillis --> ArrowType.Time(TimeUnit.MILLISECOND, 32)
  • - *
  • TimeMicros --> ArrowType.Time(TimeUnit.MICROSECOND, 64)
  • - *
  • TimestampMillis --> ArrowType.Timestamp(TimeUnit.MILLISECOND, null)
  • - *
  • TimestampMicros --> ArrowType.Timestamp(TimeUnit.MICROSECOND, null)
  • + *
  • STRING --> ArrowType.Utf8 + *
  • INT --> ArrowType.Int(32, signed) + *
  • LONG --> ArrowType.Int(64, signed) + *
  • FLOAT --> ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE) + *
  • DOUBLE --> ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE) + *
  • BOOLEAN --> ArrowType.Bool + *
  • BYTES --> ArrowType.Binary + *
  • ARRAY --> ArrowType.List + *
  • MAP --> ArrowType.Map + *
  • FIXED --> ArrowType.FixedSizeBinary + *
  • RECORD --> ArrowType.Struct + *
  • UNION --> ArrowType.Union + *
  • ENUM--> ArrowType.Int + *
  • DECIMAL --> ArrowType.Decimal + *
  • Date --> ArrowType.Date(DateUnit.DAY) + *
  • TimeMillis --> ArrowType.Time(TimeUnit.MILLISECOND, 32) + *
  • TimeMicros --> ArrowType.Time(TimeUnit.MICROSECOND, 64) + *
  • TimestampMillis --> ArrowType.Timestamp(TimeUnit.MILLISECOND, null) + *
  • TimestampMicros --> ArrowType.Timestamp(TimeUnit.MICROSECOND, null) *
*/ - private static Consumer createConsumer(Schema schema, String name, AvroToArrowConfig config) { return createConsumer(schema, name, false, config, null); } - private static Consumer createConsumer(Schema schema, String name, AvroToArrowConfig config, FieldVector vector) { + private static Consumer createConsumer( + Schema schema, String name, AvroToArrowConfig config, FieldVector vector) { return createConsumer(schema, name, false, config, vector); } @@ -144,7 +143,8 @@ private static Consumer createConsumer(Schema schema, String name, AvroToArrowCo * * @param schema avro schema * @param name arrow field name - * @param consumerVector vector to keep in consumer, if v == null, will create a new vector via field. + * @param consumerVector vector to keep in consumer, if v == null, will create a new vector via + * field. * @return consumer */ private static Consumer createConsumer( @@ -159,7 +159,7 @@ private static Consumer createConsumer( final BufferAllocator allocator = config.getAllocator(); - final Type type = schema.getType(); + final Schema.Type type = schema.getType(); final LogicalType logicalType = schema.getLogicalType(); final ArrowType arrowType; @@ -185,7 +185,7 @@ private static Consumer createConsumer( break; case STRING: arrowType = new ArrowType.Utf8(); - fieldType = new FieldType(nullable, arrowType, /*dictionary=*/null, getMetaData(schema)); + fieldType = new FieldType(nullable, arrowType, /*dictionary=*/ null, getMetaData(schema)); vector = createVector(consumerVector, fieldType, name, allocator); consumer = new AvroStringConsumer((VarCharVector) vector); break; @@ -193,12 +193,18 @@ private static Consumer createConsumer( Map extProps = createExternalProps(schema); if (logicalType instanceof LogicalTypes.Decimal) { arrowType = createDecimalArrowType((LogicalTypes.Decimal) logicalType); - fieldType = new FieldType(nullable, arrowType, /*dictionary=*/null, getMetaData(schema, extProps)); + fieldType = + new FieldType( + nullable, arrowType, /*dictionary=*/ null, getMetaData(schema, extProps)); vector = createVector(consumerVector, fieldType, name, allocator); - consumer = new AvroDecimalConsumer.FixedDecimalConsumer((DecimalVector) vector, schema.getFixedSize()); + consumer = + new AvroDecimalConsumer.FixedDecimalConsumer( + (DecimalVector) vector, schema.getFixedSize()); } else { arrowType = new ArrowType.FixedSizeBinary(schema.getFixedSize()); - fieldType = new FieldType(nullable, arrowType, /*dictionary=*/null, getMetaData(schema, extProps)); + fieldType = + new FieldType( + nullable, arrowType, /*dictionary=*/ null, getMetaData(schema, extProps)); vector = createVector(consumerVector, fieldType, name, allocator); consumer = new AvroFixedConsumer((FixedSizeBinaryVector) vector, schema.getFixedSize()); } @@ -206,84 +212,85 @@ private static Consumer createConsumer( case INT: if (logicalType instanceof LogicalTypes.Date) { arrowType = new ArrowType.Date(DateUnit.DAY); - fieldType = new FieldType(nullable, arrowType, /*dictionary=*/null, getMetaData(schema)); + fieldType = new FieldType(nullable, arrowType, /*dictionary=*/ null, getMetaData(schema)); vector = createVector(consumerVector, fieldType, name, allocator); consumer = new AvroDateConsumer((DateDayVector) vector); } else if (logicalType instanceof LogicalTypes.TimeMillis) { arrowType = new ArrowType.Time(TimeUnit.MILLISECOND, 32); - fieldType = new FieldType(nullable, arrowType, /*dictionary=*/null, getMetaData(schema)); + fieldType = new FieldType(nullable, arrowType, /*dictionary=*/ null, getMetaData(schema)); vector = createVector(consumerVector, fieldType, name, allocator); consumer = new AvroTimeMillisConsumer((TimeMilliVector) vector); } else { - arrowType = new ArrowType.Int(32, /*signed=*/true); - fieldType = new FieldType(nullable, arrowType, /*dictionary=*/null, getMetaData(schema)); + arrowType = new ArrowType.Int(32, /*isSigned=*/ true); + fieldType = new FieldType(nullable, arrowType, /*dictionary=*/ null, getMetaData(schema)); vector = createVector(consumerVector, fieldType, name, allocator); consumer = new AvroIntConsumer((IntVector) vector); } break; case BOOLEAN: arrowType = new ArrowType.Bool(); - fieldType = new FieldType(nullable, arrowType, /*dictionary=*/null, getMetaData(schema)); + fieldType = new FieldType(nullable, arrowType, /*dictionary=*/ null, getMetaData(schema)); vector = createVector(consumerVector, fieldType, name, allocator); consumer = new AvroBooleanConsumer((BitVector) vector); break; case LONG: if (logicalType instanceof LogicalTypes.TimeMicros) { arrowType = new ArrowType.Time(TimeUnit.MICROSECOND, 64); - fieldType = new FieldType(nullable, arrowType, /*dictionary=*/null, getMetaData(schema)); + fieldType = new FieldType(nullable, arrowType, /*dictionary=*/ null, getMetaData(schema)); vector = createVector(consumerVector, fieldType, name, allocator); consumer = new AvroTimeMicroConsumer((TimeMicroVector) vector); } else if (logicalType instanceof LogicalTypes.TimestampMillis) { arrowType = new ArrowType.Timestamp(TimeUnit.MILLISECOND, null); - fieldType = new FieldType(nullable, arrowType, /*dictionary=*/null, getMetaData(schema)); + fieldType = new FieldType(nullable, arrowType, /*dictionary=*/ null, getMetaData(schema)); vector = createVector(consumerVector, fieldType, name, allocator); consumer = new AvroTimestampMillisConsumer((TimeStampMilliVector) vector); } else if (logicalType instanceof LogicalTypes.TimestampMicros) { arrowType = new ArrowType.Timestamp(TimeUnit.MICROSECOND, null); - fieldType = new FieldType(nullable, arrowType, /*dictionary=*/null, getMetaData(schema)); + fieldType = new FieldType(nullable, arrowType, /*dictionary=*/ null, getMetaData(schema)); vector = createVector(consumerVector, fieldType, name, allocator); consumer = new AvroTimestampMicrosConsumer((TimeStampMicroVector) vector); } else { - arrowType = new ArrowType.Int(64, /*signed=*/true); - fieldType = new FieldType(nullable, arrowType, /*dictionary=*/null, getMetaData(schema)); + arrowType = new ArrowType.Int(64, /*isSigned=*/ true); + fieldType = new FieldType(nullable, arrowType, /*dictionary=*/ null, getMetaData(schema)); vector = createVector(consumerVector, fieldType, name, allocator); consumer = new AvroLongConsumer((BigIntVector) vector); } break; case FLOAT: arrowType = new ArrowType.FloatingPoint(SINGLE); - fieldType = new FieldType(nullable, arrowType, /*dictionary=*/null, getMetaData(schema)); + fieldType = new FieldType(nullable, arrowType, /*dictionary=*/ null, getMetaData(schema)); vector = createVector(consumerVector, fieldType, name, allocator); consumer = new AvroFloatConsumer((Float4Vector) vector); break; case DOUBLE: arrowType = new ArrowType.FloatingPoint(DOUBLE); - fieldType = new FieldType(nullable, arrowType, /*dictionary=*/null, getMetaData(schema)); + fieldType = new FieldType(nullable, arrowType, /*dictionary=*/ null, getMetaData(schema)); vector = createVector(consumerVector, fieldType, name, allocator); consumer = new AvroDoubleConsumer((Float8Vector) vector); break; case BYTES: if (logicalType instanceof LogicalTypes.Decimal) { arrowType = createDecimalArrowType((LogicalTypes.Decimal) logicalType); - fieldType = new FieldType(nullable, arrowType, /*dictionary=*/null, getMetaData(schema)); + fieldType = new FieldType(nullable, arrowType, /*dictionary=*/ null, getMetaData(schema)); vector = createVector(consumerVector, fieldType, name, allocator); consumer = new AvroDecimalConsumer.BytesDecimalConsumer((DecimalVector) vector); } else { arrowType = new ArrowType.Binary(); - fieldType = new FieldType(nullable, arrowType, /*dictionary=*/null, getMetaData(schema)); + fieldType = new FieldType(nullable, arrowType, /*dictionary=*/ null, getMetaData(schema)); vector = createVector(consumerVector, fieldType, name, allocator); consumer = new AvroBytesConsumer((VarBinaryVector) vector); } break; case NULL: arrowType = new ArrowType.Null(); - fieldType = new FieldType(nullable, arrowType, /*dictionary=*/null, getMetaData(schema)); - vector = fieldType.createNewSingleVector(name, allocator, /*schemaCallback=*/null); + fieldType = new FieldType(nullable, arrowType, /*dictionary=*/ null, getMetaData(schema)); + vector = fieldType.createNewSingleVector(name, allocator, /*schemaCallBack=*/ null); consumer = new AvroNullConsumer((NullVector) vector); break; default: // no-op, shouldn't get here - throw new UnsupportedOperationException("Can't convert avro type %s to arrow type." + type.getName()); + throw new UnsupportedOperationException( + "Can't convert avro type %s to arrow type." + type.getName()); } return consumer; } @@ -291,59 +298,65 @@ private static Consumer createConsumer( private static ArrowType createDecimalArrowType(LogicalTypes.Decimal logicalType) { final int scale = logicalType.getScale(); final int precision = logicalType.getPrecision(); - Preconditions.checkArgument(precision > 0 && precision <= 38, - "Precision must be in range of 1 to 38"); - Preconditions.checkArgument(scale >= 0 && scale <= 38, - "Scale must be in range of 0 to 38."); - Preconditions.checkArgument(scale <= precision, - "Invalid decimal scale: %s (greater than precision: %s)", scale, precision); + Preconditions.checkArgument( + precision > 0 && precision <= 38, "Precision must be in range of 1 to 38"); + Preconditions.checkArgument(scale >= 0 && scale <= 38, "Scale must be in range of 0 to 38."); + Preconditions.checkArgument( + scale <= precision, + "Invalid decimal scale: %s (greater than precision: %s)", + scale, + precision); return new ArrowType.Decimal(precision, scale, 128); - } private static Consumer createSkipConsumer(Schema schema) { SkipFunction skipFunction; - Type type = schema.getType(); + Schema.Type type = schema.getType(); switch (type) { case UNION: - List unionDelegates = schema.getTypes().stream().map(s -> - createSkipConsumer(s)).collect(Collectors.toList()); + List unionDelegates = + schema.getTypes().stream().map(s -> createSkipConsumer(s)).collect(Collectors.toList()); skipFunction = decoder -> unionDelegates.get(decoder.readInt()).consume(decoder); break; case ARRAY: Consumer elementDelegate = createSkipConsumer(schema.getElementType()); - skipFunction = decoder -> { - for (long i = decoder.skipArray(); i != 0; i = decoder.skipArray()) { - for (long j = 0; j < i; j++) { - elementDelegate.consume(decoder); - } - } - }; + skipFunction = + decoder -> { + for (long i = decoder.skipArray(); i != 0; i = decoder.skipArray()) { + for (long j = 0; j < i; j++) { + elementDelegate.consume(decoder); + } + } + }; break; case MAP: Consumer valueDelegate = createSkipConsumer(schema.getValueType()); - skipFunction = decoder -> { - for (long i = decoder.skipMap(); i != 0; i = decoder.skipMap()) { - for (long j = 0; j < i; j++) { - decoder.skipString(); // Discard key - valueDelegate.consume(decoder); - } - } - }; + skipFunction = + decoder -> { + for (long i = decoder.skipMap(); i != 0; i = decoder.skipMap()) { + for (long j = 0; j < i; j++) { + decoder.skipString(); // Discard key + valueDelegate.consume(decoder); + } + } + }; break; case RECORD: - List delegates = schema.getFields().stream().map(field -> - createSkipConsumer(field.schema())).collect(Collectors.toList()); + List delegates = + schema.getFields().stream() + .map(field -> createSkipConsumer(field.schema())) + .collect(Collectors.toList()); - skipFunction = decoder -> { - for (Consumer consumer : delegates) { - consumer.consume(decoder); - } - }; + skipFunction = + decoder -> { + for (Consumer consumer : delegates) { + consumer.consume(decoder); + } + }; break; case ENUM: @@ -374,7 +387,7 @@ private static Consumer createSkipConsumer(Schema schema) { skipFunction = decoder -> decoder.skipBytes(); break; case NULL: - skipFunction = decoder -> { }; + skipFunction = decoder -> {}; break; default: // no-op, shouldn't get here @@ -384,14 +397,13 @@ private static Consumer createSkipConsumer(Schema schema) { return new SkipConsumer(skipFunction); } - static CompositeAvroConsumer createCompositeConsumer( - Schema schema, AvroToArrowConfig config) { + static CompositeAvroConsumer createCompositeConsumer(Schema schema, AvroToArrowConfig config) { List consumers = new ArrayList<>(); final Set skipFieldNames = config.getSkipFieldNames(); Schema.Type type = schema.getType(); - if (type == Type.RECORD) { + if (type == Schema.Type.RECORD) { for (Schema.Field field : schema.getFields()) { if (skipFieldNames.contains(field.name())) { consumers.add(createSkipConsumer(field.schema())); @@ -399,7 +411,6 @@ static CompositeAvroConsumer createCompositeConsumer( Consumer consumer = createConsumer(field.schema(), field.name(), config); consumers.add(consumer); } - } } else { Consumer consumer = createConsumer(schema, "", config); @@ -409,14 +420,16 @@ static CompositeAvroConsumer createCompositeConsumer( return new CompositeAvroConsumer(consumers); } - private static FieldVector createVector(FieldVector consumerVector, FieldType fieldType, - String name, BufferAllocator allocator) { - return consumerVector != null ? consumerVector : fieldType.createNewSingleVector(name, allocator, null); + private static FieldVector createVector( + FieldVector consumerVector, FieldType fieldType, String name, BufferAllocator allocator) { + return consumerVector != null + ? consumerVector + : fieldType.createNewSingleVector(name, allocator, null); } private static String getDefaultFieldName(ArrowType type) { Types.MinorType minorType = Types.getMinorTypeForArrowType(type); - return minorType.name().toLowerCase(); + return minorType.name().toLowerCase(Locale.ROOT); } private static Field avroSchemaToField(Schema schema, String name, AvroToArrowConfig config) { @@ -424,12 +437,9 @@ private static Field avroSchemaToField(Schema schema, String name, AvroToArrowCo } private static Field avroSchemaToField( - Schema schema, - String name, - AvroToArrowConfig config, - Map externalProps) { + Schema schema, String name, AvroToArrowConfig config, Map externalProps) { - final Type type = schema.getType(); + final Schema.Type type = schema.getType(); final LogicalType logicalType = schema.getLogicalType(); final List children = new ArrayList<>(); final FieldType fieldType; @@ -441,7 +451,8 @@ private static Field avroSchemaToField( // Union child vector should use default name children.add(avroSchemaToField(childSchema, null, config)); } - fieldType = createFieldType(new ArrowType.Union(UnionMode.Sparse, null), schema, externalProps); + fieldType = + createFieldType(new ArrowType.Union(UnionMode.Sparse, null), schema, externalProps); break; case ARRAY: Schema elementSchema = schema.getElementType(); @@ -450,14 +461,18 @@ private static Field avroSchemaToField( break; case MAP: // MapVector internal struct field and key field should be non-nullable - FieldType keyFieldType = new FieldType(/*nullable=*/false, new ArrowType.Utf8(), /*dictionary=*/null); - Field keyField = new Field("key", keyFieldType, /*children=*/null); + FieldType keyFieldType = + new FieldType(/*nullable=*/ false, new ArrowType.Utf8(), /*dictionary=*/ null); + Field keyField = new Field("key", keyFieldType, /*children=*/ null); Field valueField = avroSchemaToField(schema.getValueType(), "value", config); - FieldType structFieldType = new FieldType(false, new ArrowType.Struct(), /*dictionary=*/null); - Field structField = new Field("internal", structFieldType, Arrays.asList(keyField, valueField)); + FieldType structFieldType = + new FieldType(false, new ArrowType.Struct(), /*dictionary=*/ null); + Field structField = + new Field("internal", structFieldType, Arrays.asList(keyField, valueField)); children.add(structField); - fieldType = createFieldType(new ArrowType.Map(/*keySorted=*/false), schema, externalProps); + fieldType = + createFieldType(new ArrowType.Map(/*keysSorted=*/ false), schema, externalProps); break; case RECORD: final Set skipFieldNames = config.getSkipFieldNames(); @@ -486,8 +501,12 @@ private static Field avroSchemaToField( int enumCount = schema.getEnumSymbols().size(); ArrowType.Int indexType = DictionaryEncoder.getIndexType(enumCount); - fieldType = createFieldType(indexType, schema, externalProps, - new DictionaryEncoding(current, /*ordered=*/false, /*indexType=*/indexType)); + fieldType = + createFieldType( + indexType, + schema, + externalProps, + new DictionaryEncoding(current, /*ordered=*/ false, /*indexType=*/ indexType)); break; case STRING: @@ -509,7 +528,7 @@ private static Field avroSchemaToField( } else if (logicalType instanceof LogicalTypes.TimeMillis) { intArrowType = new ArrowType.Time(TimeUnit.MILLISECOND, 32); } else { - intArrowType = new ArrowType.Int(32, /*signed=*/true); + intArrowType = new ArrowType.Int(32, /*isSigned=*/ true); } fieldType = createFieldType(intArrowType, schema, externalProps); break; @@ -525,7 +544,7 @@ private static Field avroSchemaToField( } else if (logicalType instanceof LogicalTypes.TimestampMicros) { longArrowType = new ArrowType.Timestamp(TimeUnit.MICROSECOND, null); } else { - longArrowType = new ArrowType.Int(64, /*signed=*/true); + longArrowType = new ArrowType.Int(64, /*isSigned=*/ true); } fieldType = createFieldType(longArrowType, schema, externalProps); break; @@ -558,8 +577,8 @@ private static Field avroSchemaToField( return new Field(name, fieldType, children.size() == 0 ? null : children); } - private static Consumer createArrayConsumer(Schema schema, String name, AvroToArrowConfig config, - FieldVector consumerVector) { + private static Consumer createArrayConsumer( + Schema schema, String name, AvroToArrowConfig config, FieldVector consumerVector) { ListVector listVector; if (consumerVector == null) { @@ -578,8 +597,8 @@ private static Consumer createArrayConsumer(Schema schema, String name, AvroToAr return new AvroArraysConsumer(listVector, delegate); } - private static Consumer createStructConsumer(Schema schema, String name, AvroToArrowConfig config, - FieldVector consumerVector) { + private static Consumer createStructConsumer( + Schema schema, String name, AvroToArrowConfig config, FieldVector consumerVector) { final Set skipFieldNames = config.getSkipFieldNames(); @@ -601,19 +620,22 @@ private static Consumer createStructConsumer(Schema schema, String name, AvroToA if (skipFieldNames.contains(fullChildName)) { delegate = createSkipConsumer(childField.schema()); } else { - delegate = createConsumer(childField.schema(), fullChildName, config, - structVector.getChildrenFromFields().get(vectorIndex++)); + delegate = + createConsumer( + childField.schema(), + fullChildName, + config, + structVector.getChildrenFromFields().get(vectorIndex++)); } delegates[i] = delegate; } return new AvroStructConsumer(structVector, delegates); - } - private static Consumer createEnumConsumer(Schema schema, String name, AvroToArrowConfig config, - FieldVector consumerVector) { + private static Consumer createEnumConsumer( + Schema schema, String name, AvroToArrowConfig config, FieldVector consumerVector) { BaseIntVector indexVector; if (consumerVector == null) { @@ -630,16 +652,14 @@ private static Consumer createEnumConsumer(Schema schema, String name, AvroToArr for (int i = 0; i < valueCount; i++) { dictVector.set(i, schema.getEnumSymbols().get(i).getBytes(StandardCharsets.UTF_8)); } - Dictionary dictionary = - new Dictionary(dictVector, indexVector.getField().getDictionary()); + Dictionary dictionary = new Dictionary(dictVector, indexVector.getField().getDictionary()); config.getProvider().put(dictionary); return new AvroEnumConsumer(indexVector); - } - private static Consumer createMapConsumer(Schema schema, String name, AvroToArrowConfig config, - FieldVector consumerVector) { + private static Consumer createMapConsumer( + Schema schema, String name, AvroToArrowConfig config, FieldVector consumerVector) { MapVector mapVector; if (consumerVector == null) { @@ -653,10 +673,14 @@ private static Consumer createMapConsumer(Schema schema, String name, AvroToArro StructVector structVector = (StructVector) mapVector.getDataVector(); // keys in avro map are always assumed to be strings. - Consumer keyConsumer = new AvroStringConsumer( - (VarCharVector) structVector.getChildrenFromFields().get(0)); - Consumer valueConsumer = createConsumer(schema.getValueType(), schema.getValueType().getName(), - config, structVector.getChildrenFromFields().get(1)); + Consumer keyConsumer = + new AvroStringConsumer((VarCharVector) structVector.getChildrenFromFields().get(0)); + Consumer valueConsumer = + createConsumer( + schema.getValueType(), + schema.getValueType().getName(), + config, + structVector.getChildrenFromFields().get(1)); AvroStructConsumer internalConsumer = new AvroStructConsumer(structVector, new Consumer[] {keyConsumer, valueConsumer}); @@ -664,11 +688,12 @@ private static Consumer createMapConsumer(Schema schema, String name, AvroToArro return new AvroMapConsumer(mapVector, internalConsumer); } - private static Consumer createUnionConsumer(Schema schema, String name, AvroToArrowConfig config, - FieldVector consumerVector) { + private static Consumer createUnionConsumer( + Schema schema, String name, AvroToArrowConfig config, FieldVector consumerVector) { final int size = schema.getTypes().size(); - final boolean nullable = schema.getTypes().stream().anyMatch(t -> t.getType() == Type.NULL); + final boolean nullable = + schema.getTypes().stream().anyMatch(t -> t.getType() == Schema.Type.NULL); UnionVector unionVector; if (consumerVector == null) { @@ -695,21 +720,19 @@ private static Consumer createUnionConsumer(Schema schema, String name, AvroToAr /** * Read data from {@link Decoder} and generate a {@link VectorSchemaRoot}. + * * @param schema avro schema * @param decoder avro decoder to read data from */ static VectorSchemaRoot avroToArrowVectors( - Schema schema, - Decoder decoder, - AvroToArrowConfig config) - throws IOException { + Schema schema, Decoder decoder, AvroToArrowConfig config) throws IOException { List vectors = new ArrayList<>(); List consumers = new ArrayList<>(); final Set skipFieldNames = config.getSkipFieldNames(); Schema.Type type = schema.getType(); - if (type == Type.RECORD) { + if (type == Schema.Type.RECORD) { for (Schema.Field field : schema.getFields()) { if (skipFieldNames.contains(field.name())) { consumers.add(createSkipConsumer(field.schema())); @@ -726,8 +749,8 @@ static VectorSchemaRoot avroToArrowVectors( } long validConsumerCount = consumers.stream().filter(c -> !c.skippable()).count(); - Preconditions.checkArgument(vectors.size() == validConsumerCount, - "vectors size not equals consumers size."); + Preconditions.checkArgument( + vectors.size() == validConsumerCount, "vectors size not equals consumers size."); List fields = vectors.stream().map(t -> t.getField()).collect(Collectors.toList()); @@ -767,9 +790,7 @@ private static Map getMetaData(Schema schema, Map createExternalProps(Schema schema) { final Map extProps = new HashMap<>(); String doc = schema.getDoc(); @@ -783,8 +804,9 @@ private static Map createExternalProps(Schema schema) { return extProps; } - private static FieldType createFieldType(ArrowType arrowType, Schema schema, Map externalProps) { - return createFieldType(arrowType, schema, externalProps, /*dictionary=*/null); + private static FieldType createFieldType( + ArrowType arrowType, Schema schema, Map externalProps) { + return createFieldType(arrowType, schema, externalProps, /*dictionary=*/ null); } private static FieldType createFieldType( @@ -793,8 +815,8 @@ private static FieldType createFieldType( Map externalProps, DictionaryEncoding dictionary) { - return new FieldType(/*nullable=*/false, arrowType, dictionary, - getMetaData(schema, externalProps)); + return new FieldType( + /*nullable=*/ false, arrowType, dictionary, getMetaData(schema, externalProps)); } private static String convertAliases(Set aliases) { diff --git a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/AvroToArrowVectorIterator.java b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/AvroToArrowVectorIterator.java index 4a439ade81181..4123370061794 100644 --- a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/AvroToArrowVectorIterator.java +++ b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/AvroToArrowVectorIterator.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.avro; import java.io.EOFException; @@ -22,7 +21,6 @@ import java.util.Iterator; import java.util.List; import java.util.stream.Collectors; - import org.apache.arrow.adapter.avro.consumers.CompositeAvroConsumer; import org.apache.arrow.util.Preconditions; import org.apache.arrow.vector.FieldVector; @@ -32,9 +30,7 @@ import org.apache.avro.Schema; import org.apache.avro.io.Decoder; -/** - * VectorSchemaRoot iterator for partially converting avro data. - */ +/** VectorSchemaRoot iterator for partially converting avro data. */ public class AvroToArrowVectorIterator implements Iterator, AutoCloseable { public static final int NO_LIMIT_BATCH_SIZE = -1; @@ -53,28 +49,18 @@ public class AvroToArrowVectorIterator implements Iterator, Au private final int targetBatchSize; - /** - * Construct an instance. - */ - private AvroToArrowVectorIterator( - Decoder decoder, - Schema schema, - AvroToArrowConfig config) { + /** Construct an instance. */ + private AvroToArrowVectorIterator(Decoder decoder, Schema schema, AvroToArrowConfig config) { this.decoder = decoder; this.schema = schema; this.config = config; this.targetBatchSize = config.getTargetBatchSize(); - } - /** - * Create a ArrowVectorIterator to partially convert data. - */ + /** Create a ArrowVectorIterator to partially convert data. */ public static AvroToArrowVectorIterator create( - Decoder decoder, - Schema schema, - AvroToArrowConfig config) { + Decoder decoder, Schema schema, AvroToArrowConfig config) { AvroToArrowVectorIterator iterator = new AvroToArrowVectorIterator(decoder, schema, config); try { @@ -136,9 +122,10 @@ private void load(VectorSchemaRoot root) { ValueVectorUtility.preAllocate(root, targetBatchSize); } - long validConsumerCount = compositeConsumer.getConsumers().stream().filter(c -> - !c.skippable()).count(); - Preconditions.checkArgument(root.getFieldVectors().size() == validConsumerCount, + long validConsumerCount = + compositeConsumer.getConsumers().stream().filter(c -> !c.skippable()).count(); + Preconditions.checkArgument( + root.getFieldVectors().size() == validConsumerCount, "Schema root vectors size not equals to consumers size."); compositeConsumer.resetConsumerVectors(root); @@ -159,9 +146,8 @@ public boolean hasNext() { return nextBatch != null; } - /** - * Gets the next vector. The user is responsible for freeing its resources. - */ + /** Gets the next vector. The user is responsible for freeing its resources. */ + @Override public VectorSchemaRoot next() { Preconditions.checkArgument(hasNext()); VectorSchemaRoot returned = nextBatch; @@ -174,9 +160,8 @@ public VectorSchemaRoot next() { return returned; } - /** - * Clean up resources. - */ + /** Clean up resources. */ + @Override public void close() { if (nextBatch != null) { nextBatch.close(); diff --git a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroArraysConsumer.java b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroArraysConsumer.java index fd25986c32b95..4555ce7a295f7 100644 --- a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroArraysConsumer.java +++ b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroArraysConsumer.java @@ -14,25 +14,20 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.avro.consumers; import java.io.IOException; - import org.apache.arrow.vector.complex.ListVector; import org.apache.avro.io.Decoder; /** - * Consumer which consume array type values from avro decoder. - * Write the data to {@link ListVector}. + * Consumer which consume array type values from avro decoder. Write the data to {@link ListVector}. */ public class AvroArraysConsumer extends BaseAvroConsumer { private final Consumer delegate; - /** - * Instantiate a ArrayConsumer. - */ + /** Instantiate a ArrayConsumer. */ public AvroArraysConsumer(ListVector vector, Consumer delegate) { super(vector); this.delegate = delegate; diff --git a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroBooleanConsumer.java b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroBooleanConsumer.java index bf41828d19f7a..09eb5f3b255d5 100644 --- a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroBooleanConsumer.java +++ b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroBooleanConsumer.java @@ -14,23 +14,19 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.avro.consumers; import java.io.IOException; - import org.apache.arrow.vector.BitVector; import org.apache.avro.io.Decoder; /** - * Consumer which consume boolean type values from avro decoder. - * Write the data to {@link BitVector}. + * Consumer which consume boolean type values from avro decoder. Write the data to {@link + * BitVector}. */ public class AvroBooleanConsumer extends BaseAvroConsumer { - /** - * Instantiate a AvroBooleanConsumer. - */ + /** Instantiate a AvroBooleanConsumer. */ public AvroBooleanConsumer(BitVector vector) { super(vector); } diff --git a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroBytesConsumer.java b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroBytesConsumer.java index c8370e480608d..86b6cbb13d881 100644 --- a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroBytesConsumer.java +++ b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroBytesConsumer.java @@ -14,26 +14,22 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.avro.consumers; import java.io.IOException; import java.nio.ByteBuffer; - import org.apache.arrow.vector.VarBinaryVector; import org.apache.avro.io.Decoder; /** - * Consumer which consume bytes type values from avro decoder. - * Write the data to {@link VarBinaryVector}. + * Consumer which consume bytes type values from avro decoder. Write the data to {@link + * VarBinaryVector}. */ public class AvroBytesConsumer extends BaseAvroConsumer { private ByteBuffer cacheBuffer; - /** - * Instantiate a AvroBytesConsumer. - */ + /** Instantiate a AvroBytesConsumer. */ public AvroBytesConsumer(VarBinaryVector vector) { super(vector); } diff --git a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroDoubleConsumer.java b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroDoubleConsumer.java index 7cc7dd33b15a9..011cbccc09c5b 100644 --- a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroDoubleConsumer.java +++ b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroDoubleConsumer.java @@ -14,23 +14,19 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.avro.consumers; import java.io.IOException; - import org.apache.arrow.vector.Float8Vector; import org.apache.avro.io.Decoder; /** - * Consumer which consume double type values from avro decoder. - * Write the data to {@link Float8Vector}. + * Consumer which consume double type values from avro decoder. Write the data to {@link + * Float8Vector}. */ public class AvroDoubleConsumer extends BaseAvroConsumer { - /** - * Instantiate a AvroDoubleConsumer. - */ + /** Instantiate a AvroDoubleConsumer. */ public AvroDoubleConsumer(Float8Vector vector) { super(vector); } diff --git a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroEnumConsumer.java b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroEnumConsumer.java index 32a2c85f6fc50..f47988fb962a1 100644 --- a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroEnumConsumer.java +++ b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroEnumConsumer.java @@ -14,24 +14,19 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.avro.consumers; import java.io.IOException; - import org.apache.arrow.vector.BaseIntVector; import org.apache.arrow.vector.IntVector; import org.apache.avro.io.Decoder; /** - * Consumer which consume enum type values from avro decoder. - * Write the data to {@link IntVector}. + * Consumer which consume enum type values from avro decoder. Write the data to {@link IntVector}. */ public class AvroEnumConsumer extends BaseAvroConsumer { - /** - * Instantiate a AvroEnumConsumer. - */ + /** Instantiate a AvroEnumConsumer. */ public AvroEnumConsumer(BaseIntVector vector) { super(vector); } diff --git a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroFixedConsumer.java b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroFixedConsumer.java index 16b70898fd36a..6b78afd3c95d4 100644 --- a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroFixedConsumer.java +++ b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroFixedConsumer.java @@ -14,25 +14,21 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.avro.consumers; import java.io.IOException; - import org.apache.arrow.vector.FixedSizeBinaryVector; import org.apache.avro.io.Decoder; /** - * Consumer which consume fixed type values from avro decoder. - * Write the data to {@link org.apache.arrow.vector.FixedSizeBinaryVector}. + * Consumer which consume fixed type values from avro decoder. Write the data to {@link + * org.apache.arrow.vector.FixedSizeBinaryVector}. */ public class AvroFixedConsumer extends BaseAvroConsumer { private final byte[] reuseBytes; - /** - * Instantiate a AvroFixedConsumer. - */ + /** Instantiate a AvroFixedConsumer. */ public AvroFixedConsumer(FixedSizeBinaryVector vector, int size) { super(vector); reuseBytes = new byte[size]; diff --git a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroFloatConsumer.java b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroFloatConsumer.java index b09d2881875b6..2c6d4aa5a05f6 100644 --- a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroFloatConsumer.java +++ b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroFloatConsumer.java @@ -14,23 +14,19 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.avro.consumers; import java.io.IOException; - import org.apache.arrow.vector.Float4Vector; import org.apache.avro.io.Decoder; /** - * Consumer which consume float type values from avro decoder. - * Write the data to {@link Float4Vector}. + * Consumer which consume float type values from avro decoder. Write the data to {@link + * Float4Vector}. */ public class AvroFloatConsumer extends BaseAvroConsumer { - /** - * Instantiate a AvroFloatConsumer. - */ + /** Instantiate a AvroFloatConsumer. */ public AvroFloatConsumer(Float4Vector vector) { super(vector); } diff --git a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroIntConsumer.java b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroIntConsumer.java index ae5a2719c5642..22c7b10aa65f7 100644 --- a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroIntConsumer.java +++ b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroIntConsumer.java @@ -14,23 +14,18 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.avro.consumers; import java.io.IOException; - import org.apache.arrow.vector.IntVector; import org.apache.avro.io.Decoder; /** - * Consumer which consume int type values from avro decoder. - * Write the data to {@link IntVector}. + * Consumer which consume int type values from avro decoder. Write the data to {@link IntVector}. */ public class AvroIntConsumer extends BaseAvroConsumer { - /** - * Instantiate a AvroIntConsumer. - */ + /** Instantiate a AvroIntConsumer. */ public AvroIntConsumer(IntVector vector) { super(vector); } diff --git a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroLongConsumer.java b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroLongConsumer.java index 4db836acc4586..90c5313417d7c 100644 --- a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroLongConsumer.java +++ b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroLongConsumer.java @@ -14,23 +14,19 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.avro.consumers; import java.io.IOException; - import org.apache.arrow.vector.BigIntVector; import org.apache.avro.io.Decoder; /** - * Consumer which consume long type values from avro decoder. - * Write the data to {@link BigIntVector}. + * Consumer which consume long type values from avro decoder. Write the data to {@link + * BigIntVector}. */ public class AvroLongConsumer extends BaseAvroConsumer { - /** - * Instantiate a AvroLongConsumer. - */ + /** Instantiate a AvroLongConsumer. */ public AvroLongConsumer(BigIntVector vector) { super(vector); } diff --git a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroMapConsumer.java b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroMapConsumer.java index 1ea97e63b61e5..543471533ec01 100644 --- a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroMapConsumer.java +++ b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroMapConsumer.java @@ -14,27 +14,22 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.avro.consumers; import java.io.IOException; - import org.apache.arrow.vector.FieldVector; import org.apache.arrow.vector.complex.MapVector; import org.apache.arrow.vector.complex.StructVector; import org.apache.avro.io.Decoder; /** - * Consumer which consume map type values from avro decoder. - * Write the data to {@link MapVector}. + * Consumer which consume map type values from avro decoder. Write the data to {@link MapVector}. */ public class AvroMapConsumer extends BaseAvroConsumer { private final Consumer delegate; - /** - * Instantiate a AvroMapConsumer. - */ + /** Instantiate a AvroMapConsumer. */ public AvroMapConsumer(MapVector vector, Consumer delegate) { super(vector); this.delegate = delegate; diff --git a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroNullConsumer.java b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroNullConsumer.java index 4c7bb8c03bad3..0f80c2b7b2db3 100644 --- a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroNullConsumer.java +++ b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroNullConsumer.java @@ -14,17 +14,15 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.avro.consumers; import java.io.IOException; - import org.apache.arrow.vector.NullVector; import org.apache.avro.io.Decoder; /** - * Consumer which consume null type values from avro decoder. - * Corresponding to {@link org.apache.arrow.vector.NullVector}. + * Consumer which consume null type values from avro decoder. Corresponding to {@link + * org.apache.arrow.vector.NullVector}. */ public class AvroNullConsumer extends BaseAvroConsumer { diff --git a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroStringConsumer.java b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroStringConsumer.java index 072270aa6c081..164d595e9c6ac 100644 --- a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroStringConsumer.java +++ b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroStringConsumer.java @@ -14,26 +14,22 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.avro.consumers; import java.io.IOException; import java.nio.ByteBuffer; - import org.apache.arrow.vector.VarCharVector; import org.apache.avro.io.Decoder; /** - * Consumer which consume string type values from avro decoder. - * Write the data to {@link VarCharVector}. + * Consumer which consume string type values from avro decoder. Write the data to {@link + * VarCharVector}. */ public class AvroStringConsumer extends BaseAvroConsumer { private ByteBuffer cacheBuffer; - /** - * Instantiate a AvroStringConsumer. - */ + /** Instantiate a AvroStringConsumer. */ public AvroStringConsumer(VarCharVector vector) { super(vector); } diff --git a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroStructConsumer.java b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroStructConsumer.java index a02b1577f9fa8..94c2f611e84b7 100644 --- a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroStructConsumer.java +++ b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroStructConsumer.java @@ -14,27 +14,23 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.avro.consumers; import java.io.IOException; - import org.apache.arrow.util.AutoCloseables; import org.apache.arrow.vector.FieldVector; import org.apache.arrow.vector.complex.StructVector; import org.apache.avro.io.Decoder; /** - * Consumer which consume nested record type values from avro decoder. - * Write the data to {@link org.apache.arrow.vector.complex.StructVector}. + * Consumer which consume nested record type values from avro decoder. Write the data to {@link + * org.apache.arrow.vector.complex.StructVector}. */ public class AvroStructConsumer extends BaseAvroConsumer { private final Consumer[] delegates; - /** - * Instantiate a AvroStructConsumer. - */ + /** Instantiate a AvroStructConsumer. */ public AvroStructConsumer(StructVector vector, Consumer[] delegates) { super(vector); this.delegates = delegates; @@ -49,7 +45,6 @@ public void consume(Decoder decoder) throws IOException { } vector.setIndexDefined(currentIndex); currentIndex++; - } @Override diff --git a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroUnionsConsumer.java b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroUnionsConsumer.java index 76287543b0646..5a8e23e62892c 100644 --- a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroUnionsConsumer.java +++ b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroUnionsConsumer.java @@ -14,11 +14,9 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.avro.consumers; import java.io.IOException; - import org.apache.arrow.util.AutoCloseables; import org.apache.arrow.vector.ValueVector; import org.apache.arrow.vector.complex.UnionVector; @@ -26,17 +24,15 @@ import org.apache.avro.io.Decoder; /** - * Consumer which consume unions type values from avro decoder. - * Write the data to {@link org.apache.arrow.vector.complex.UnionVector}. + * Consumer which consume unions type values from avro decoder. Write the data to {@link + * org.apache.arrow.vector.complex.UnionVector}. */ public class AvroUnionsConsumer extends BaseAvroConsumer { private Consumer[] delegates; private Types.MinorType[] types; - /** - * Instantiate an AvroUnionConsumer. - */ + /** Instantiate an AvroUnionConsumer. */ public AvroUnionsConsumer(UnionVector vector, Consumer[] delegates, Types.MinorType[] types) { super(vector); @@ -53,7 +49,8 @@ public void consume(Decoder decoder) throws IOException { vector.setType(currentIndex, types[fieldIndex]); // In UnionVector we need to set sub vector writer position before consume a value - // because in the previous iterations we might not have written to the specific union sub vector. + // because in the previous iterations we might not have written to the specific union sub + // vector. delegate.setPosition(currentIndex); delegate.consume(decoder); diff --git a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/BaseAvroConsumer.java b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/BaseAvroConsumer.java index 66a6cda68401e..9430d83cb4372 100644 --- a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/BaseAvroConsumer.java +++ b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/BaseAvroConsumer.java @@ -14,13 +14,13 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.avro.consumers; import org.apache.arrow.vector.FieldVector; /** * Base class for non-skippable avro consumers. + * * @param vector type. */ public abstract class BaseAvroConsumer implements Consumer { @@ -30,6 +30,7 @@ public abstract class BaseAvroConsumer implements Consume /** * Constructs a base avro consumer. + * * @param vector the vector to consume. */ public BaseAvroConsumer(T vector) { diff --git a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/CompositeAvroConsumer.java b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/CompositeAvroConsumer.java index 97812226180ac..11c1f7712ef19 100644 --- a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/CompositeAvroConsumer.java +++ b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/CompositeAvroConsumer.java @@ -14,20 +14,15 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.avro.consumers; import java.io.IOException; import java.util.List; - import org.apache.arrow.util.AutoCloseables; import org.apache.arrow.vector.VectorSchemaRoot; import org.apache.avro.io.Decoder; -/** - * Composite consumer which hold all consumers. - * It manages the consume and cleanup process. - */ +/** Composite consumer which hold all consumers. It manages the consume and cleanup process. */ public class CompositeAvroConsumer implements AutoCloseable { private final List consumers; @@ -40,18 +35,14 @@ public CompositeAvroConsumer(List consumers) { this.consumers = consumers; } - /** - * Consume decoder data. - */ + /** Consume decoder data. */ public void consume(Decoder decoder) throws IOException { for (Consumer consumer : consumers) { consumer.consume(decoder); } } - /** - * Reset vector of consumers with the given {@link VectorSchemaRoot}. - */ + /** Reset vector of consumers with the given {@link VectorSchemaRoot}. */ public void resetConsumerVectors(VectorSchemaRoot root) { int index = 0; for (Consumer consumer : consumers) { diff --git a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/Consumer.java b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/Consumer.java index c2ae1ce77b282..0c07f90bf5f39 100644 --- a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/Consumer.java +++ b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/Consumer.java @@ -14,58 +14,49 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.avro.consumers; import java.io.IOException; - import org.apache.arrow.vector.FieldVector; import org.apache.avro.io.Decoder; /** * Interface that is used to consume values from avro decoder. + * * @param The vector within consumer or its delegate, used for partially consume purpose. */ public interface Consumer extends AutoCloseable { /** * Consume a specific type value from avro decoder and write it to vector. + * * @param decoder avro decoder to read data * @throws IOException on error */ void consume(Decoder decoder) throws IOException; - /** - * Add null value to vector by making writer position + 1. - */ + /** Add null value to vector by making writer position + 1. */ void addNull(); - /** - * Set the position to write value into vector. - */ + /** Set the position to write value into vector. */ void setPosition(int index); - /** - * Get the vector within the consumer. - */ + /** Get the vector within the consumer. */ FieldVector getVector(); - /** - * Close this consumer when occurs exception to avoid potential leak. - */ + /** Close this consumer when occurs exception to avoid potential leak. */ + @Override void close() throws Exception; /** * Reset the vector within consumer for partial read purpose. + * * @return true if reset is successful, false if reset is not needed. */ boolean resetValueVector(T vector); - /** - * Indicates whether the consumer is type of {@link SkipConsumer}. - */ + /** Indicates whether the consumer is type of {@link SkipConsumer}. */ default boolean skippable() { return false; } - } diff --git a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/SkipConsumer.java b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/SkipConsumer.java index 1ac0a6d71557b..2c104728ce620 100644 --- a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/SkipConsumer.java +++ b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/SkipConsumer.java @@ -14,17 +14,13 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.avro.consumers; import java.io.IOException; - import org.apache.arrow.vector.FieldVector; import org.apache.avro.io.Decoder; -/** - * Consumer which skip (throw away) data from the decoder. - */ +/** Consumer which skip (throw away) data from the decoder. */ public class SkipConsumer implements Consumer { private final SkipFunction skipFunction; @@ -39,12 +35,10 @@ public void consume(Decoder decoder) throws IOException { } @Override - public void addNull() { - } + public void addNull() {} @Override - public void setPosition(int index) { - } + public void setPosition(int index) {} @Override public FieldVector getVector() { @@ -52,8 +46,7 @@ public FieldVector getVector() { } @Override - public void close() throws Exception { - } + public void close() throws Exception {} @Override public boolean resetValueVector(FieldVector vector) { diff --git a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/SkipFunction.java b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/SkipFunction.java index 93fc4a7fede3f..3d72d03104f3c 100644 --- a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/SkipFunction.java +++ b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/SkipFunction.java @@ -14,16 +14,12 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.avro.consumers; import java.io.IOException; - import org.apache.avro.io.Decoder; -/** - * Adapter function to skip (throw away) data from the decoder. - */ +/** Adapter function to skip (throw away) data from the decoder. */ @FunctionalInterface public interface SkipFunction { void apply(Decoder decoder) throws IOException; diff --git a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/logical/AvroDateConsumer.java b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/logical/AvroDateConsumer.java index a5c36d88fb76a..0f557297a3cb7 100644 --- a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/logical/AvroDateConsumer.java +++ b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/logical/AvroDateConsumer.java @@ -14,24 +14,20 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.avro.consumers.logical; import java.io.IOException; - import org.apache.arrow.adapter.avro.consumers.BaseAvroConsumer; import org.apache.arrow.vector.DateDayVector; import org.apache.avro.io.Decoder; /** - * Consumer which consume date type values from avro decoder. - * Write the data to {@link DateDayVector}. + * Consumer which consume date type values from avro decoder. Write the data to {@link + * DateDayVector}. */ public class AvroDateConsumer extends BaseAvroConsumer { - /** - * Instantiate a AvroDateConsumer. - */ + /** Instantiate a AvroDateConsumer. */ public AvroDateConsumer(DateDayVector vector) { super(vector); } diff --git a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/logical/AvroDecimalConsumer.java b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/logical/AvroDecimalConsumer.java index ebe5ca3884e5e..fa1a12ac8a6ed 100644 --- a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/logical/AvroDecimalConsumer.java +++ b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/logical/AvroDecimalConsumer.java @@ -14,40 +14,32 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.avro.consumers.logical; import java.io.IOException; import java.nio.ByteBuffer; - import org.apache.arrow.adapter.avro.consumers.BaseAvroConsumer; import org.apache.arrow.util.Preconditions; import org.apache.arrow.vector.DecimalVector; import org.apache.avro.io.Decoder; /** - * Consumer which consume decimal type values from avro decoder. - * Write the data to {@link DecimalVector}. + * Consumer which consume decimal type values from avro decoder. Write the data to {@link + * DecimalVector}. */ public abstract class AvroDecimalConsumer extends BaseAvroConsumer { - /** - * Instantiate a AvroDecimalConsumer. - */ + /** Instantiate a AvroDecimalConsumer. */ public AvroDecimalConsumer(DecimalVector vector) { super(vector); } - /** - * Consumer for decimal logical type with original bytes type. - */ + /** Consumer for decimal logical type with original bytes type. */ public static class BytesDecimalConsumer extends AvroDecimalConsumer { private ByteBuffer cacheBuffer; - /** - * Instantiate a BytesDecimalConsumer. - */ + /** Instantiate a BytesDecimalConsumer. */ public BytesDecimalConsumer(DecimalVector vector) { super(vector); } @@ -60,19 +52,14 @@ public void consume(Decoder decoder) throws IOException { cacheBuffer.get(bytes); vector.setBigEndian(currentIndex++, bytes); } - } - /** - * Consumer for decimal logical type with original fixed type. - */ + /** Consumer for decimal logical type with original fixed type. */ public static class FixedDecimalConsumer extends AvroDecimalConsumer { private byte[] reuseBytes; - /** - * Instantiate a FixedDecimalConsumer. - */ + /** Instantiate a FixedDecimalConsumer. */ public FixedDecimalConsumer(DecimalVector vector, int size) { super(vector); Preconditions.checkArgument(size <= 16, "Decimal bytes length should <= 16."); diff --git a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/logical/AvroTimeMicroConsumer.java b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/logical/AvroTimeMicroConsumer.java index 89216d4ad1436..60e7d15bf16d6 100644 --- a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/logical/AvroTimeMicroConsumer.java +++ b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/logical/AvroTimeMicroConsumer.java @@ -14,24 +14,20 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.avro.consumers.logical; import java.io.IOException; - import org.apache.arrow.adapter.avro.consumers.BaseAvroConsumer; import org.apache.arrow.vector.TimeMicroVector; import org.apache.avro.io.Decoder; /** - * Consumer which consume date time-micro values from avro decoder. - * Write the data to {@link TimeMicroVector}. + * Consumer which consume date time-micro values from avro decoder. Write the data to {@link + * TimeMicroVector}. */ public class AvroTimeMicroConsumer extends BaseAvroConsumer { - /** - * Instantiate a AvroTimeMicroConsumer. - */ + /** Instantiate a AvroTimeMicroConsumer. */ public AvroTimeMicroConsumer(TimeMicroVector vector) { super(vector); } diff --git a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/logical/AvroTimeMillisConsumer.java b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/logical/AvroTimeMillisConsumer.java index ab5df8d4bc8ac..e0b232e9abd5e 100644 --- a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/logical/AvroTimeMillisConsumer.java +++ b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/logical/AvroTimeMillisConsumer.java @@ -14,24 +14,20 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.avro.consumers.logical; import java.io.IOException; - import org.apache.arrow.adapter.avro.consumers.BaseAvroConsumer; import org.apache.arrow.vector.TimeMilliVector; import org.apache.avro.io.Decoder; /** - * Consumer which consume date time-millis values from avro decoder. - * Write the data to {@link TimeMilliVector}. + * Consumer which consume date time-millis values from avro decoder. Write the data to {@link + * TimeMilliVector}. */ public class AvroTimeMillisConsumer extends BaseAvroConsumer { - /** - * Instantiate a AvroTimeMilliConsumer. - */ + /** Instantiate a AvroTimeMilliConsumer. */ public AvroTimeMillisConsumer(TimeMilliVector vector) { super(vector); } diff --git a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/logical/AvroTimestampMicrosConsumer.java b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/logical/AvroTimestampMicrosConsumer.java index 93b39d479ff0e..88acf7b329569 100644 --- a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/logical/AvroTimestampMicrosConsumer.java +++ b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/logical/AvroTimestampMicrosConsumer.java @@ -14,24 +14,20 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.avro.consumers.logical; import java.io.IOException; - import org.apache.arrow.adapter.avro.consumers.BaseAvroConsumer; import org.apache.arrow.vector.TimeStampMicroVector; import org.apache.avro.io.Decoder; /** - * Consumer which consume date timestamp-micro values from avro decoder. - * Write the data to {@link TimeStampMicroVector}. + * Consumer which consume date timestamp-micro values from avro decoder. Write the data to {@link + * TimeStampMicroVector}. */ public class AvroTimestampMicrosConsumer extends BaseAvroConsumer { - /** - * Instantiate a AvroTimestampMicroConsumer. - */ + /** Instantiate a AvroTimestampMicroConsumer. */ public AvroTimestampMicrosConsumer(TimeStampMicroVector vector) { super(vector); } diff --git a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/logical/AvroTimestampMillisConsumer.java b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/logical/AvroTimestampMillisConsumer.java index 9e651c3959f81..ec50d7902319c 100644 --- a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/logical/AvroTimestampMillisConsumer.java +++ b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/logical/AvroTimestampMillisConsumer.java @@ -14,24 +14,20 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.avro.consumers.logical; import java.io.IOException; - import org.apache.arrow.adapter.avro.consumers.BaseAvroConsumer; import org.apache.arrow.vector.TimeStampMilliVector; import org.apache.avro.io.Decoder; /** - * Consumer which consume date timestamp-millis values from avro decoder. - * Write the data to {@link TimeStampMilliVector}. + * Consumer which consume date timestamp-millis values from avro decoder. Write the data to {@link + * TimeStampMilliVector}. */ public class AvroTimestampMillisConsumer extends BaseAvroConsumer { - /** - * Instantiate a AvroTimestampMillisConsumer. - */ + /** Instantiate a AvroTimestampMillisConsumer. */ public AvroTimestampMillisConsumer(TimeStampMilliVector vector) { super(vector); } diff --git a/java/adapter/avro/src/test/java/org/apache/arrow/adapter/avro/AvroLogicalTypesTest.java b/java/adapter/avro/src/test/java/org/apache/arrow/adapter/avro/AvroLogicalTypesTest.java index 6ee04e33a5ce1..d8eefc715f611 100644 --- a/java/adapter/avro/src/test/java/org/apache/arrow/adapter/avro/AvroLogicalTypesTest.java +++ b/java/adapter/avro/src/test/java/org/apache/arrow/adapter/avro/AvroLogicalTypesTest.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.avro; import static junit.framework.TestCase.assertNull; @@ -27,7 +26,6 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.List; - import org.apache.arrow.vector.FieldVector; import org.apache.arrow.vector.VectorSchemaRoot; import org.apache.arrow.vector.util.DateUtility; @@ -43,13 +41,13 @@ public void testTimestampMicros() throws Exception { Schema schema = getSchema("logical/test_timestamp_micros.avsc"); List data = Arrays.asList(10000L, 20000L, 30000L, 40000L, 50000L); - List expected = Arrays.asList( - DateUtility.getLocalDateTimeFromEpochMicro(10000), - DateUtility.getLocalDateTimeFromEpochMicro(20000), - DateUtility.getLocalDateTimeFromEpochMicro(30000), - DateUtility.getLocalDateTimeFromEpochMicro(40000), - DateUtility.getLocalDateTimeFromEpochMicro(50000) - ); + List expected = + Arrays.asList( + DateUtility.getLocalDateTimeFromEpochMicro(10000), + DateUtility.getLocalDateTimeFromEpochMicro(20000), + DateUtility.getLocalDateTimeFromEpochMicro(30000), + DateUtility.getLocalDateTimeFromEpochMicro(40000), + DateUtility.getLocalDateTimeFromEpochMicro(50000)); VectorSchemaRoot root = writeAndRead(schema, data); FieldVector vector = root.getFieldVectors().get(0); @@ -62,13 +60,13 @@ public void testTimestampMillis() throws Exception { Schema schema = getSchema("logical/test_timestamp_millis.avsc"); List data = Arrays.asList(10000L, 20000L, 30000L, 40000L, 50000L); - List expected = Arrays.asList( - DateUtility.getLocalDateTimeFromEpochMilli(10000), - DateUtility.getLocalDateTimeFromEpochMilli(20000), - DateUtility.getLocalDateTimeFromEpochMilli(30000), - DateUtility.getLocalDateTimeFromEpochMilli(40000), - DateUtility.getLocalDateTimeFromEpochMilli(50000) - ); + List expected = + Arrays.asList( + DateUtility.getLocalDateTimeFromEpochMilli(10000), + DateUtility.getLocalDateTimeFromEpochMilli(20000), + DateUtility.getLocalDateTimeFromEpochMilli(30000), + DateUtility.getLocalDateTimeFromEpochMilli(40000), + DateUtility.getLocalDateTimeFromEpochMilli(50000)); VectorSchemaRoot root = writeAndRead(schema, data); FieldVector vector = root.getFieldVectors().get(0); @@ -93,13 +91,13 @@ public void testTimeMillis() throws Exception { Schema schema = getSchema("logical/test_time_millis.avsc"); List data = Arrays.asList(100, 200, 300, 400, 500); - List expected = Arrays.asList( - DateUtility.getLocalDateTimeFromEpochMilli(100), - DateUtility.getLocalDateTimeFromEpochMilli(200), - DateUtility.getLocalDateTimeFromEpochMilli(300), - DateUtility.getLocalDateTimeFromEpochMilli(400), - DateUtility.getLocalDateTimeFromEpochMilli(500) - ); + List expected = + Arrays.asList( + DateUtility.getLocalDateTimeFromEpochMilli(100), + DateUtility.getLocalDateTimeFromEpochMilli(200), + DateUtility.getLocalDateTimeFromEpochMilli(300), + DateUtility.getLocalDateTimeFromEpochMilli(400), + DateUtility.getLocalDateTimeFromEpochMilli(500)); VectorSchemaRoot root = writeAndRead(schema, data); FieldVector vector = root.getFieldVectors().get(0); @@ -137,7 +135,6 @@ public void testDecimalWithOriginalBytes() throws Exception { VectorSchemaRoot root = writeAndRead(schema, data); FieldVector vector = root.getFieldVectors().get(0); checkPrimitiveResult(expected, vector); - } @Test @@ -174,10 +171,9 @@ public void testInvalidDecimalPrecision() throws Exception { data.add(buffer); } - IllegalArgumentException e = assertThrows(IllegalArgumentException.class, - () -> writeAndRead(schema, data)); + IllegalArgumentException e = + assertThrows(IllegalArgumentException.class, () -> writeAndRead(schema, data)); assertTrue(e.getMessage().contains("Precision must be in range of 1 to 38")); - } @Test @@ -197,5 +193,4 @@ public void testFailedToCreateDecimalLogicalType() throws Exception { Schema schema3 = getSchema("logical/test_decimal_invalid4.avsc"); assertNull(schema3.getLogicalType()); } - } diff --git a/java/adapter/avro/src/test/java/org/apache/arrow/adapter/avro/AvroSkipFieldTest.java b/java/adapter/avro/src/test/java/org/apache/arrow/adapter/avro/AvroSkipFieldTest.java index a37eca6514e04..3335ee5a8f6dc 100644 --- a/java/adapter/avro/src/test/java/org/apache/arrow/adapter/avro/AvroSkipFieldTest.java +++ b/java/adapter/avro/src/test/java/org/apache/arrow/adapter/avro/AvroSkipFieldTest.java @@ -14,18 +14,17 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.avro; import static org.junit.Assert.assertEquals; import java.nio.ByteBuffer; +import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; import java.util.HashSet; import java.util.Set; - import org.apache.arrow.vector.VectorSchemaRoot; import org.apache.arrow.vector.complex.StructVector; import org.apache.arrow.vector.types.Types; @@ -40,7 +39,10 @@ public class AvroSkipFieldTest extends AvroTestBase { public void testSkipUnionWithOneField() throws Exception { Set skipFieldNames = new HashSet<>(); skipFieldNames.add("f0"); - config = new AvroToArrowConfigBuilder(config.getAllocator()).setSkipFieldNames(skipFieldNames).build(); + config = + new AvroToArrowConfigBuilder(config.getAllocator()) + .setSkipFieldNames(skipFieldNames) + .build(); Schema schema = getSchema("skip/test_skip_union_before.avsc"); Schema expectedSchema = getSchema("skip/test_skip_union_one_field_expected.avsc"); @@ -69,7 +71,10 @@ public void testSkipUnionWithOneField() throws Exception { public void testSkipUnionWithNullableOneField() throws Exception { Set skipFieldNames = new HashSet<>(); skipFieldNames.add("f1"); - config = new AvroToArrowConfigBuilder(config.getAllocator()).setSkipFieldNames(skipFieldNames).build(); + config = + new AvroToArrowConfigBuilder(config.getAllocator()) + .setSkipFieldNames(skipFieldNames) + .build(); Schema schema = getSchema("skip/test_skip_union_before.avsc"); Schema expectedSchema = getSchema("skip/test_skip_union_nullable_field_expected.avsc"); @@ -98,7 +103,10 @@ public void testSkipUnionWithNullableOneField() throws Exception { public void testSkipUnionWithMultiFields() throws Exception { Set skipFieldNames = new HashSet<>(); skipFieldNames.add("f2"); - config = new AvroToArrowConfigBuilder(config.getAllocator()).setSkipFieldNames(skipFieldNames).build(); + config = + new AvroToArrowConfigBuilder(config.getAllocator()) + .setSkipFieldNames(skipFieldNames) + .build(); Schema schema = getSchema("skip/test_skip_union_before.avsc"); Schema expectedSchema = getSchema("skip/test_skip_union_multi_fields_expected.avsc"); @@ -127,7 +135,10 @@ public void testSkipUnionWithMultiFields() throws Exception { public void testSkipMapField() throws Exception { Set skipFieldNames = new HashSet<>(); skipFieldNames.add("f1"); - config = new AvroToArrowConfigBuilder(config.getAllocator()).setSkipFieldNames(skipFieldNames).build(); + config = + new AvroToArrowConfigBuilder(config.getAllocator()) + .setSkipFieldNames(skipFieldNames) + .build(); Schema schema = getSchema("skip/test_skip_map_before.avsc"); Schema expectedSchema = getSchema("skip/test_skip_map_expected.avsc"); @@ -159,7 +170,10 @@ public void testSkipMapField() throws Exception { public void testSkipArrayField() throws Exception { Set skipFieldNames = new HashSet<>(); skipFieldNames.add("f1"); - config = new AvroToArrowConfigBuilder(config.getAllocator()).setSkipFieldNames(skipFieldNames).build(); + config = + new AvroToArrowConfigBuilder(config.getAllocator()) + .setSkipFieldNames(skipFieldNames) + .build(); Schema schema = getSchema("skip/test_skip_array_before.avsc"); Schema expectedSchema = getSchema("skip/test_skip_array_expected.avsc"); @@ -188,7 +202,10 @@ public void testSkipMultiFields() throws Exception { Set skipFieldNames = new HashSet<>(); skipFieldNames.add("f1"); skipFieldNames.add("f2"); - config = new AvroToArrowConfigBuilder(config.getAllocator()).setSkipFieldNames(skipFieldNames).build(); + config = + new AvroToArrowConfigBuilder(config.getAllocator()) + .setSkipFieldNames(skipFieldNames) + .build(); Schema schema = getSchema("test_record.avsc"); Schema expectedSchema = getSchema("skip/test_skip_multi_fields_expected.avsc"); @@ -215,7 +232,10 @@ public void testSkipMultiFields() throws Exception { public void testSkipStringField() throws Exception { Set skipFieldNames = new HashSet<>(); skipFieldNames.add("f2"); - config = new AvroToArrowConfigBuilder(config.getAllocator()).setSkipFieldNames(skipFieldNames).build(); + config = + new AvroToArrowConfigBuilder(config.getAllocator()) + .setSkipFieldNames(skipFieldNames) + .build(); Schema schema = getSchema("skip/test_skip_base1.avsc"); Schema expectedSchema = getSchema("skip/test_skip_string_expected.avsc"); @@ -223,12 +243,13 @@ public void testSkipStringField() throws Exception { ArrayList expectedData = new ArrayList<>(); for (int i = 0; i < 5; i++) { - final byte[] testBytes = ("test" + i).getBytes(); + final byte[] testBytes = ("test" + i).getBytes(StandardCharsets.UTF_8); GenericRecord record = new GenericData.Record(schema); GenericData.Fixed fixed = new GenericData.Fixed(schema.getField("f0").schema()); fixed.bytes(testBytes); record.put(0, fixed); - GenericData.EnumSymbol symbol = new GenericData.EnumSymbol(schema.getField("f1").schema(), "TEST" + i % 2); + GenericData.EnumSymbol symbol = + new GenericData.EnumSymbol(schema.getField("f1").schema(), "TEST" + i % 2); record.put(1, symbol); record.put(2, "testtest" + i); record.put(3, ByteBuffer.wrap(testBytes)); @@ -249,7 +270,10 @@ public void testSkipStringField() throws Exception { public void testSkipBytesField() throws Exception { Set skipFieldNames = new HashSet<>(); skipFieldNames.add("f3"); - config = new AvroToArrowConfigBuilder(config.getAllocator()).setSkipFieldNames(skipFieldNames).build(); + config = + new AvroToArrowConfigBuilder(config.getAllocator()) + .setSkipFieldNames(skipFieldNames) + .build(); Schema schema = getSchema("skip/test_skip_base1.avsc"); Schema expectedSchema = getSchema("skip/test_skip_bytes_expected.avsc"); @@ -257,12 +281,13 @@ public void testSkipBytesField() throws Exception { ArrayList expectedData = new ArrayList<>(); for (int i = 0; i < 5; i++) { - final byte[] testBytes = ("test" + i).getBytes(); + final byte[] testBytes = ("test" + i).getBytes(StandardCharsets.UTF_8); GenericRecord record = new GenericData.Record(schema); GenericData.Fixed fixed = new GenericData.Fixed(schema.getField("f0").schema()); fixed.bytes(testBytes); record.put(0, fixed); - GenericData.EnumSymbol symbol = new GenericData.EnumSymbol(schema.getField("f1").schema(), "TEST" + i % 2); + GenericData.EnumSymbol symbol = + new GenericData.EnumSymbol(schema.getField("f1").schema(), "TEST" + i % 2); record.put(1, symbol); record.put(2, "testtest" + i); record.put(3, ByteBuffer.wrap(testBytes)); @@ -283,7 +308,10 @@ public void testSkipBytesField() throws Exception { public void testSkipFixedField() throws Exception { Set skipFieldNames = new HashSet<>(); skipFieldNames.add("f0"); - config = new AvroToArrowConfigBuilder(config.getAllocator()).setSkipFieldNames(skipFieldNames).build(); + config = + new AvroToArrowConfigBuilder(config.getAllocator()) + .setSkipFieldNames(skipFieldNames) + .build(); Schema schema = getSchema("skip/test_skip_base1.avsc"); Schema expectedSchema = getSchema("skip/test_skip_fixed_expected.avsc"); @@ -291,12 +319,13 @@ public void testSkipFixedField() throws Exception { ArrayList expectedData = new ArrayList<>(); for (int i = 0; i < 5; i++) { - final byte[] testBytes = ("test" + i).getBytes(); + final byte[] testBytes = ("test" + i).getBytes(StandardCharsets.UTF_8); GenericRecord record = new GenericData.Record(schema); GenericData.Fixed fixed = new GenericData.Fixed(schema.getField("f0").schema()); fixed.bytes(testBytes); record.put(0, fixed); - GenericData.EnumSymbol symbol = new GenericData.EnumSymbol(schema.getField("f1").schema(), "TEST" + i % 2); + GenericData.EnumSymbol symbol = + new GenericData.EnumSymbol(schema.getField("f1").schema(), "TEST" + i % 2); record.put(1, symbol); record.put(2, "testtest" + i); record.put(3, ByteBuffer.wrap(testBytes)); @@ -317,7 +346,10 @@ public void testSkipFixedField() throws Exception { public void testSkipEnumField() throws Exception { Set skipFieldNames = new HashSet<>(); skipFieldNames.add("f1"); - config = new AvroToArrowConfigBuilder(config.getAllocator()).setSkipFieldNames(skipFieldNames).build(); + config = + new AvroToArrowConfigBuilder(config.getAllocator()) + .setSkipFieldNames(skipFieldNames) + .build(); Schema schema = getSchema("skip/test_skip_base1.avsc"); Schema expectedSchema = getSchema("skip/test_skip_fixed_expected.avsc"); @@ -325,12 +357,13 @@ public void testSkipEnumField() throws Exception { ArrayList expectedData = new ArrayList<>(); for (int i = 0; i < 5; i++) { - final byte[] testBytes = ("test" + i).getBytes(); + final byte[] testBytes = ("test" + i).getBytes(StandardCharsets.UTF_8); GenericRecord record = new GenericData.Record(schema); GenericData.Fixed fixed = new GenericData.Fixed(schema.getField("f0").schema()); fixed.bytes(testBytes); record.put(0, fixed); - GenericData.EnumSymbol symbol = new GenericData.EnumSymbol(schema.getField("f1").schema(), "TEST" + i % 2); + GenericData.EnumSymbol symbol = + new GenericData.EnumSymbol(schema.getField("f1").schema(), "TEST" + i % 2); record.put(1, symbol); record.put(2, "testtest" + i); record.put(3, ByteBuffer.wrap(testBytes)); @@ -351,7 +384,10 @@ public void testSkipEnumField() throws Exception { public void testSkipBooleanField() throws Exception { Set skipFieldNames = new HashSet<>(); skipFieldNames.add("f0"); - config = new AvroToArrowConfigBuilder(config.getAllocator()).setSkipFieldNames(skipFieldNames).build(); + config = + new AvroToArrowConfigBuilder(config.getAllocator()) + .setSkipFieldNames(skipFieldNames) + .build(); Schema schema = getSchema("skip/test_skip_base2.avsc"); Schema expectedSchema = getSchema("skip/test_skip_boolean_expected.avsc"); @@ -384,7 +420,10 @@ public void testSkipBooleanField() throws Exception { public void testSkipIntField() throws Exception { Set skipFieldNames = new HashSet<>(); skipFieldNames.add("f1"); - config = new AvroToArrowConfigBuilder(config.getAllocator()).setSkipFieldNames(skipFieldNames).build(); + config = + new AvroToArrowConfigBuilder(config.getAllocator()) + .setSkipFieldNames(skipFieldNames) + .build(); Schema schema = getSchema("skip/test_skip_base2.avsc"); Schema expectedSchema = getSchema("skip/test_skip_int_expected.avsc"); @@ -417,7 +456,10 @@ public void testSkipIntField() throws Exception { public void testSkipLongField() throws Exception { Set skipFieldNames = new HashSet<>(); skipFieldNames.add("f2"); - config = new AvroToArrowConfigBuilder(config.getAllocator()).setSkipFieldNames(skipFieldNames).build(); + config = + new AvroToArrowConfigBuilder(config.getAllocator()) + .setSkipFieldNames(skipFieldNames) + .build(); Schema schema = getSchema("skip/test_skip_base2.avsc"); Schema expectedSchema = getSchema("skip/test_skip_long_expected.avsc"); @@ -450,7 +492,10 @@ public void testSkipLongField() throws Exception { public void testSkipFloatField() throws Exception { Set skipFieldNames = new HashSet<>(); skipFieldNames.add("f3"); - config = new AvroToArrowConfigBuilder(config.getAllocator()).setSkipFieldNames(skipFieldNames).build(); + config = + new AvroToArrowConfigBuilder(config.getAllocator()) + .setSkipFieldNames(skipFieldNames) + .build(); Schema schema = getSchema("skip/test_skip_base2.avsc"); Schema expectedSchema = getSchema("skip/test_skip_float_expected.avsc"); @@ -483,7 +528,10 @@ public void testSkipFloatField() throws Exception { public void testSkipDoubleField() throws Exception { Set skipFieldNames = new HashSet<>(); skipFieldNames.add("f4"); - config = new AvroToArrowConfigBuilder(config.getAllocator()).setSkipFieldNames(skipFieldNames).build(); + config = + new AvroToArrowConfigBuilder(config.getAllocator()) + .setSkipFieldNames(skipFieldNames) + .build(); Schema schema = getSchema("skip/test_skip_base2.avsc"); Schema expectedSchema = getSchema("skip/test_skip_double_expected.avsc"); @@ -516,7 +564,10 @@ public void testSkipDoubleField() throws Exception { public void testSkipRecordField() throws Exception { Set skipFieldNames = new HashSet<>(); skipFieldNames.add("f0"); - config = new AvroToArrowConfigBuilder(config.getAllocator()).setSkipFieldNames(skipFieldNames).build(); + config = + new AvroToArrowConfigBuilder(config.getAllocator()) + .setSkipFieldNames(skipFieldNames) + .build(); Schema schema = getSchema("skip/test_skip_record_before.avsc"); Schema nestedSchema = schema.getFields().get(0).schema(); ArrayList data = new ArrayList<>(); @@ -546,7 +597,10 @@ public void testSkipRecordField() throws Exception { public void testSkipNestedFields() throws Exception { Set skipFieldNames = new HashSet<>(); skipFieldNames.add("f0.f0"); - config = new AvroToArrowConfigBuilder(config.getAllocator()).setSkipFieldNames(skipFieldNames).build(); + config = + new AvroToArrowConfigBuilder(config.getAllocator()) + .setSkipFieldNames(skipFieldNames) + .build(); Schema schema = getSchema("test_nested_record.avsc"); Schema nestedSchema = schema.getFields().get(0).schema(); ArrayList data = new ArrayList<>(); @@ -602,21 +656,26 @@ public void testSkipThirdLevelField() throws Exception { assertEquals(Types.MinorType.STRUCT, root1.getFieldVectors().get(0).getMinorType()); StructVector secondLevelVector = (StructVector) root1.getFieldVectors().get(0); assertEquals(1, secondLevelVector.getChildrenFromFields().size()); - assertEquals(Types.MinorType.STRUCT, secondLevelVector.getChildrenFromFields().get(0).getMinorType()); + assertEquals( + Types.MinorType.STRUCT, secondLevelVector.getChildrenFromFields().get(0).getMinorType()); StructVector thirdLevelVector = (StructVector) secondLevelVector.getChildrenFromFields().get(0); assertEquals(3, thirdLevelVector.getChildrenFromFields().size()); // skip third level field and validate Set skipFieldNames = new HashSet<>(); skipFieldNames.add("f0.f0.f0"); - config = new AvroToArrowConfigBuilder(config.getAllocator()).setSkipFieldNames(skipFieldNames).build(); + config = + new AvroToArrowConfigBuilder(config.getAllocator()) + .setSkipFieldNames(skipFieldNames) + .build(); VectorSchemaRoot root2 = writeAndRead(firstLevelSchema, data); assertEquals(1, root2.getFieldVectors().size()); assertEquals(Types.MinorType.STRUCT, root2.getFieldVectors().get(0).getMinorType()); StructVector secondStruct = (StructVector) root2.getFieldVectors().get(0); assertEquals(1, secondStruct.getChildrenFromFields().size()); - assertEquals(Types.MinorType.STRUCT, secondStruct.getChildrenFromFields().get(0).getMinorType()); + assertEquals( + Types.MinorType.STRUCT, secondStruct.getChildrenFromFields().get(0).getMinorType()); StructVector thirdStruct = (StructVector) secondStruct.getChildrenFromFields().get(0); assertEquals(2, thirdStruct.getChildrenFromFields().size()); diff --git a/java/adapter/avro/src/test/java/org/apache/arrow/adapter/avro/AvroTestBase.java b/java/adapter/avro/src/test/java/org/apache/arrow/adapter/avro/AvroTestBase.java index 60a3a285db3aa..534c2cc18c572 100644 --- a/java/adapter/avro/src/test/java/org/apache/arrow/adapter/avro/AvroTestBase.java +++ b/java/adapter/avro/src/test/java/org/apache/arrow/adapter/avro/AvroTestBase.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.avro; import static org.junit.Assert.assertEquals; @@ -29,7 +28,6 @@ import java.nio.ByteBuffer; import java.util.ArrayList; import java.util.List; - import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; import org.apache.arrow.vector.FieldVector; @@ -51,8 +49,7 @@ public class AvroTestBase { - @ClassRule - public static final TemporaryFolder TMP = new TemporaryFolder(); + @ClassRule public static final TemporaryFolder TMP = new TemporaryFolder(); protected AvroToArrowConfig config; @@ -64,18 +61,21 @@ public void init() { public static Schema getSchema(String schemaName) throws Exception { try { - // Attempt to use JDK 9 behavior of getting the module then the resource stream from the module. + // Attempt to use JDK 9 behavior of getting the module then the resource stream from the + // module. // Note that this code is caller-sensitive. Method getModuleMethod = Class.class.getMethod("getModule"); Object module = getModuleMethod.invoke(TestWriteReadAvroRecord.class); - Method getResourceAsStreamFromModule = module.getClass().getMethod("getResourceAsStream", String.class); - try (InputStream is = (InputStream) getResourceAsStreamFromModule.invoke(module, "/schema/" + schemaName)) { - return new Schema.Parser() - .parse(is); + Method getResourceAsStreamFromModule = + module.getClass().getMethod("getResourceAsStream", String.class); + try (InputStream is = + (InputStream) getResourceAsStreamFromModule.invoke(module, "/schema/" + schemaName)) { + return new Schema.Parser().parse(is); } } catch (NoSuchMethodException ex) { // Use JDK8 behavior. - try (InputStream is = TestWriteReadAvroRecord.class.getResourceAsStream("/schema/" + schemaName)) { + try (InputStream is = + TestWriteReadAvroRecord.class.getResourceAsStream("/schema/" + schemaName)) { return new Schema.Parser().parse(is); } } @@ -84,11 +84,11 @@ public static Schema getSchema(String schemaName) throws Exception { protected VectorSchemaRoot writeAndRead(Schema schema, List data) throws Exception { File dataFile = TMP.newFile(); - BinaryEncoder - encoder = new EncoderFactory().directBinaryEncoder(new FileOutputStream(dataFile), null); + BinaryEncoder encoder = + new EncoderFactory().directBinaryEncoder(new FileOutputStream(dataFile), null); DatumWriter writer = new GenericDatumWriter(schema); - BinaryDecoder - decoder = new DecoderFactory().directBinaryDecoder(new FileInputStream(dataFile), null); + BinaryDecoder decoder = + new DecoderFactory().directBinaryDecoder(new FileInputStream(dataFile), null); for (Object value : data) { writer.write(value, encoder); @@ -145,7 +145,7 @@ protected void checkPrimitiveResult(List data, FieldVector vector) { } } - protected void checkRecordResult(Schema schema, ArrayList data, VectorSchemaRoot root) { + protected void checkRecordResult(Schema schema, List data, VectorSchemaRoot root) { assertEquals(data.size(), root.getRowCount()); assertEquals(schema.getFields().size(), root.getFieldVectors().size()); @@ -157,10 +157,10 @@ protected void checkRecordResult(Schema schema, ArrayList data, V checkPrimitiveResult(fieldData, root.getFieldVectors().get(i)); } - } - protected void checkNestedRecordResult(Schema schema, List data, VectorSchemaRoot root) { + protected void checkNestedRecordResult( + Schema schema, List data, VectorSchemaRoot root) { assertEquals(data.size(), root.getRowCount()); assertTrue(schema.getFields().size() == 1); @@ -176,10 +176,8 @@ protected void checkNestedRecordResult(Schema schema, List data, checkPrimitiveResult(fieldData, structVector.getChildrenFromFields().get(i)); } - } - // belows are for iterator api protected void checkArrayResult(List> expected, List vectors) { @@ -194,10 +192,12 @@ protected void checkArrayResult(List> expected, List vectors } } - protected void checkRecordResult(Schema schema, ArrayList data, List roots) { - roots.forEach(root -> { - assertEquals(schema.getFields().size(), root.getFieldVectors().size()); - }); + protected void checkRecordResult( + Schema schema, List data, List roots) { + roots.forEach( + root -> { + assertEquals(schema.getFields().size(), root.getFieldVectors().size()); + }); for (int i = 0; i < schema.getFields().size(); i++) { List fieldData = new ArrayList(); @@ -210,7 +210,6 @@ protected void checkRecordResult(Schema schema, ArrayList data, L checkPrimitiveResult(fieldData, vectors); } - } protected void checkPrimitiveResult(List data, List vectors) { diff --git a/java/adapter/avro/src/test/java/org/apache/arrow/adapter/avro/AvroToArrowIteratorTest.java b/java/adapter/avro/src/test/java/org/apache/arrow/adapter/avro/AvroToArrowIteratorTest.java index 02f7a3733734c..7e73b2d6c7038 100644 --- a/java/adapter/avro/src/test/java/org/apache/arrow/adapter/avro/AvroToArrowIteratorTest.java +++ b/java/adapter/avro/src/test/java/org/apache/arrow/adapter/avro/AvroToArrowIteratorTest.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.avro; import static org.junit.Assert.assertEquals; @@ -28,7 +27,6 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.List; - import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; import org.apache.arrow.util.AutoCloseables; @@ -59,11 +57,11 @@ public void init() { private AvroToArrowVectorIterator convert(Schema schema, List data) throws Exception { File dataFile = TMP.newFile(); - BinaryEncoder - encoder = new EncoderFactory().directBinaryEncoder(new FileOutputStream(dataFile), null); + BinaryEncoder encoder = + new EncoderFactory().directBinaryEncoder(new FileOutputStream(dataFile), null); DatumWriter writer = new GenericDatumWriter(schema); - BinaryDecoder - decoder = new DecoderFactory().directBinaryDecoder(new FileInputStream(dataFile), null); + BinaryDecoder decoder = + new DecoderFactory().directBinaryDecoder(new FileInputStream(dataFile), null); for (Object value : data) { writer.write(value, encoder); @@ -107,7 +105,7 @@ public void testNullableStringType() throws Exception { List roots = new ArrayList<>(); List vectors = new ArrayList<>(); - try (AvroToArrowVectorIterator iterator = convert(schema, data);) { + try (AvroToArrowVectorIterator iterator = convert(schema, data); ) { while (iterator.hasNext()) { VectorSchemaRoot root = iterator.next(); FieldVector vector = root.getFieldVectors().get(0); @@ -117,7 +115,6 @@ public void testNullableStringType() throws Exception { } checkPrimitiveResult(expected, vectors); AutoCloseables.close(roots); - } @Test @@ -140,18 +137,18 @@ public void testRecordType() throws Exception { } checkRecordResult(schema, data, roots); AutoCloseables.close(roots); - } @Test public void testArrayType() throws Exception { Schema schema = getSchema("test_array.avsc"); - List> data = Arrays.asList( - Arrays.asList("11", "222", "999"), - Arrays.asList("12222", "2333", "1000"), - Arrays.asList("1rrr", "2ggg"), - Arrays.asList("1vvv", "2bbb"), - Arrays.asList("1fff", "2")); + List> data = + Arrays.asList( + Arrays.asList("11", "222", "999"), + Arrays.asList("12222", "2333", "1000"), + Arrays.asList("1rrr", "2ggg"), + Arrays.asList("1vvv", "2bbb"), + Arrays.asList("1fff", "2")); List roots = new ArrayList<>(); List vectors = new ArrayList<>(); @@ -172,8 +169,9 @@ public void runLargeNumberOfRows() throws Exception { int x = 0; final int targetRows = 600000; Decoder fakeDecoder = new FakeDecoder(targetRows); - try (AvroToArrowVectorIterator iter = AvroToArrow.avroToArrowIterator(schema, fakeDecoder, - new AvroToArrowConfigBuilder(config.getAllocator()).build())) { + try (AvroToArrowVectorIterator iter = + AvroToArrow.avroToArrowIterator( + schema, fakeDecoder, new AvroToArrowConfigBuilder(config.getAllocator()).build())) { while (iter.hasNext()) { VectorSchemaRoot root = iter.next(); x += root.getRowCount(); @@ -181,13 +179,11 @@ public void runLargeNumberOfRows() throws Exception { } } - assertEquals(x, targetRows); + assertEquals(targetRows, x); } - /** - * Fake avro decoder to test large data. - */ - private class FakeDecoder extends Decoder { + /** Fake avro decoder to test large data. */ + private static class FakeDecoder extends Decoder { private int numRows; @@ -204,8 +200,7 @@ private void validate() throws EOFException { } @Override - public void readNull() throws IOException { - } + public void readNull() throws IOException {} @Override public boolean readBoolean() throws IOException { @@ -243,9 +238,7 @@ public String readString() throws IOException { } @Override - public void skipString() throws IOException { - - } + public void skipString() throws IOException {} @Override public ByteBuffer readBytes(ByteBuffer old) throws IOException { @@ -253,9 +246,7 @@ public ByteBuffer readBytes(ByteBuffer old) throws IOException { } @Override - public void skipBytes() throws IOException { - - } + public void skipBytes() throws IOException {} @Override public void readFixed(byte[] bytes, int start, int length) throws IOException { @@ -264,9 +255,7 @@ public void readFixed(byte[] bytes, int start, int length) throws IOException { } @Override - public void skipFixed(int length) throws IOException { - - } + public void skipFixed(int length) throws IOException {} @Override public int readEnum() throws IOException { diff --git a/java/adapter/avro/src/test/java/org/apache/arrow/adapter/avro/AvroToArrowTest.java b/java/adapter/avro/src/test/java/org/apache/arrow/adapter/avro/AvroToArrowTest.java index 1c64204191762..59317c3be033f 100644 --- a/java/adapter/avro/src/test/java/org/apache/arrow/adapter/avro/AvroToArrowTest.java +++ b/java/adapter/avro/src/test/java/org/apache/arrow/adapter/avro/AvroToArrowTest.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.avro; import static org.junit.Assert.assertEquals; @@ -26,7 +25,6 @@ import java.util.LinkedHashMap; import java.util.List; import java.util.Map; - import org.apache.arrow.vector.FieldVector; import org.apache.arrow.vector.VarCharVector; import org.apache.arrow.vector.VectorSchemaRoot; @@ -87,10 +85,8 @@ public void testFixedAttributes() throws Exception { Schema schema = getSchema("attrs/test_fixed_attr.avsc"); List data = new ArrayList<>(); - List expected = new ArrayList<>(); for (int i = 0; i < 5; i++) { byte[] value = ("value" + i).getBytes(StandardCharsets.UTF_8); - expected.add(value); GenericData.Fixed fixed = new GenericData.Fixed(schema); fixed.bytes(value); data.add(fixed); @@ -107,12 +103,13 @@ public void testFixedAttributes() throws Exception { @Test public void testEnumAttributes() throws Exception { Schema schema = getSchema("attrs/test_enum_attrs.avsc"); - List data = Arrays.asList( - new GenericData.EnumSymbol(schema, "SPADES"), - new GenericData.EnumSymbol(schema, "HEARTS"), - new GenericData.EnumSymbol(schema, "DIAMONDS"), - new GenericData.EnumSymbol(schema, "CLUBS"), - new GenericData.EnumSymbol(schema, "SPADES")); + List data = + Arrays.asList( + new GenericData.EnumSymbol(schema, "SPADES"), + new GenericData.EnumSymbol(schema, "HEARTS"), + new GenericData.EnumSymbol(schema, "DIAMONDS"), + new GenericData.EnumSymbol(schema, "CLUBS"), + new GenericData.EnumSymbol(schema, "SPADES")); VectorSchemaRoot root = writeAndRead(schema, data); FieldVector vector = root.getFieldVectors().get(0); @@ -174,12 +171,13 @@ public void testNestedRecordType() throws Exception { @Test public void testEnumType() throws Exception { Schema schema = getSchema("test_primitive_enum.avsc"); - List data = Arrays.asList( - new GenericData.EnumSymbol(schema, "SPADES"), - new GenericData.EnumSymbol(schema, "HEARTS"), - new GenericData.EnumSymbol(schema, "DIAMONDS"), - new GenericData.EnumSymbol(schema, "CLUBS"), - new GenericData.EnumSymbol(schema, "SPADES")); + List data = + Arrays.asList( + new GenericData.EnumSymbol(schema, "SPADES"), + new GenericData.EnumSymbol(schema, "HEARTS"), + new GenericData.EnumSymbol(schema, "DIAMONDS"), + new GenericData.EnumSymbol(schema, "CLUBS"), + new GenericData.EnumSymbol(schema, "SPADES")); List expectedIndices = Arrays.asList(0, 1, 2, 3, 0); @@ -304,12 +302,13 @@ public void testNullableDoubleType() throws Exception { @Test public void testBytesType() throws Exception { Schema schema = getSchema("test_primitive_bytes.avsc"); - List data = Arrays.asList( - ByteBuffer.wrap("value1".getBytes(StandardCharsets.UTF_8)), - ByteBuffer.wrap("value2".getBytes(StandardCharsets.UTF_8)), - ByteBuffer.wrap("value3".getBytes(StandardCharsets.UTF_8)), - ByteBuffer.wrap("value4".getBytes(StandardCharsets.UTF_8)), - ByteBuffer.wrap("value5".getBytes(StandardCharsets.UTF_8))); + List data = + Arrays.asList( + ByteBuffer.wrap("value1".getBytes(StandardCharsets.UTF_8)), + ByteBuffer.wrap("value2".getBytes(StandardCharsets.UTF_8)), + ByteBuffer.wrap("value3".getBytes(StandardCharsets.UTF_8)), + ByteBuffer.wrap("value4".getBytes(StandardCharsets.UTF_8)), + ByteBuffer.wrap("value5".getBytes(StandardCharsets.UTF_8))); VectorSchemaRoot root = writeAndRead(schema, data); FieldVector vector = root.getFieldVectors().get(0); @@ -324,7 +323,8 @@ public void testNullableBytesType() throws Exception { ArrayList data = new ArrayList<>(); for (int i = 0; i < 5; i++) { GenericRecord record = new GenericData.Record(schema); - record.put(0, i % 2 == 0 ? ByteBuffer.wrap(("test" + i).getBytes(StandardCharsets.UTF_8)) : null); + record.put( + 0, i % 2 == 0 ? ByteBuffer.wrap(("test" + i).getBytes(StandardCharsets.UTF_8)) : null); data.add(record); } @@ -361,12 +361,13 @@ public void testNullableBooleanType() throws Exception { @Test public void testArrayType() throws Exception { Schema schema = getSchema("test_array.avsc"); - List> data = Arrays.asList( - Arrays.asList("11", "222", "999"), - Arrays.asList("12222", "2333", "1000"), - Arrays.asList("1rrr", "2ggg"), - Arrays.asList("1vvv", "2bbb"), - Arrays.asList("1fff", "2")); + List> data = + Arrays.asList( + Arrays.asList("11", "222", "999"), + Arrays.asList("12222", "2333", "1000"), + Arrays.asList("1rrr", "2ggg"), + Arrays.asList("1vvv", "2bbb"), + Arrays.asList("1fff", "2")); VectorSchemaRoot root = writeAndRead(schema, data); FieldVector vector = root.getFieldVectors().get(0); @@ -473,5 +474,4 @@ public void testNullableUnionType() throws Exception { checkPrimitiveResult(expected, vector); } - } diff --git a/java/adapter/avro/src/test/java/org/apache/arrow/adapter/avro/TestWriteReadAvroRecord.java b/java/adapter/avro/src/test/java/org/apache/arrow/adapter/avro/TestWriteReadAvroRecord.java index afbddaa6ed87a..a721a1e4cc6a8 100644 --- a/java/adapter/avro/src/test/java/org/apache/arrow/adapter/avro/TestWriteReadAvroRecord.java +++ b/java/adapter/avro/src/test/java/org/apache/arrow/adapter/avro/TestWriteReadAvroRecord.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.avro; import static org.junit.Assert.assertEquals; @@ -22,7 +21,6 @@ import java.io.File; import java.util.ArrayList; import java.util.List; - import org.apache.avro.Schema; import org.apache.avro.file.DataFileReader; import org.apache.avro.file.DataFileWriter; @@ -36,11 +34,9 @@ import org.junit.Test; import org.junit.rules.TemporaryFolder; - public class TestWriteReadAvroRecord { - @ClassRule - public static final TemporaryFolder TMP = new TemporaryFolder(); + @ClassRule public static final TemporaryFolder TMP = new TemporaryFolder(); @Test public void testWriteAndRead() throws Exception { @@ -48,7 +44,7 @@ public void testWriteAndRead() throws Exception { File dataFile = TMP.newFile(); Schema schema = AvroTestBase.getSchema("test.avsc"); - //write data to disk + // write data to disk GenericRecord user1 = new GenericData.Record(schema); user1.put("name", "Alyssa"); user1.put("favorite_number", 256); @@ -65,10 +61,10 @@ public void testWriteAndRead() throws Exception { dataFileWriter.append(user2); dataFileWriter.close(); - //read data from disk + // read data from disk DatumReader datumReader = new GenericDatumReader(schema); - DataFileReader - dataFileReader = new DataFileReader(dataFile, datumReader); + DataFileReader dataFileReader = + new DataFileReader(dataFile, datumReader); List result = new ArrayList<>(); while (dataFileReader.hasNext()) { GenericRecord user = dataFileReader.next(); @@ -86,5 +82,4 @@ public void testWriteAndRead() throws Exception { assertEquals(7, deUser2.get("favorite_number")); assertEquals("red", deUser2.get("favorite_color").toString()); } - } diff --git a/java/adapter/jdbc/pom.xml b/java/adapter/jdbc/pom.xml index 5f72729bb76e7..b444eff56277d 100644 --- a/java/adapter/jdbc/pom.xml +++ b/java/adapter/jdbc/pom.xml @@ -24,22 +24,24 @@ (Contrib/Experimental)A library for converting JDBC data to Arrow data. http://maven.apache.org + + dev/checkstyle/checkstyle-spotless.xml + none + + - org.apache.arrow arrow-memory-core - org.apache.arrow arrow-memory-netty runtime - org.apache.arrow arrow-vector @@ -48,10 +50,9 @@ org.immutables - value + value-annotations - com.h2database h2 @@ -94,9 +95,6 @@ jdk11+ [11,] - - !m2e.version - diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/ArrowVectorIterator.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/ArrowVectorIterator.java index 427c766982f30..d30cf32a04996 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/ArrowVectorIterator.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/ArrowVectorIterator.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc; import static org.apache.arrow.adapter.jdbc.JdbcToArrowUtils.isColumnNullable; @@ -23,7 +22,6 @@ import java.sql.ResultSetMetaData; import java.sql.SQLException; import java.util.Iterator; - import org.apache.arrow.adapter.jdbc.consumer.CompositeJdbcConsumer; import org.apache.arrow.adapter.jdbc.consumer.JdbcConsumer; import org.apache.arrow.adapter.jdbc.consumer.exceptions.JdbcConsumerException; @@ -35,9 +33,7 @@ import org.apache.arrow.vector.types.pojo.Schema; import org.apache.arrow.vector.util.ValueVectorUtility; -/** - * VectorSchemaRoot iterator for partially converting JDBC data. - */ +/** VectorSchemaRoot iterator for partially converting JDBC data. */ public class ArrowVectorIterator implements Iterator, AutoCloseable { private final ResultSet resultSet; @@ -54,13 +50,12 @@ public class ArrowVectorIterator implements Iterator, AutoClos private final int targetBatchSize; - // This is used to track whether the ResultSet has been fully read, and is needed specifically for cases where there + // This is used to track whether the ResultSet has been fully read, and is needed specifically for + // cases where there // is a ResultSet having zero rows (empty): private boolean readComplete = false; - /** - * Construct an instance. - */ + /** Construct an instance. */ private ArrowVectorIterator(ResultSet resultSet, JdbcToArrowConfig config) throws SQLException { this.resultSet = resultSet; this.config = config; @@ -73,12 +68,8 @@ private ArrowVectorIterator(ResultSet resultSet, JdbcToArrowConfig config) throw this.nextBatch = config.isReuseVectorSchemaRoot() ? createVectorSchemaRoot() : null; } - /** - * Create a ArrowVectorIterator to partially convert data. - */ - public static ArrowVectorIterator create( - ResultSet resultSet, - JdbcToArrowConfig config) + /** Create a ArrowVectorIterator to partially convert data. */ + public static ArrowVectorIterator create(ResultSet resultSet, JdbcToArrowConfig config) throws SQLException { ArrowVectorIterator iterator = null; try { @@ -142,10 +133,18 @@ private VectorSchemaRoot createVectorSchemaRoot() throws SQLException { private void initialize(VectorSchemaRoot root) throws SQLException { for (int i = 1; i <= consumers.length; i++) { - final JdbcFieldInfo columnFieldInfo = JdbcToArrowUtils.getJdbcFieldInfoForColumn(rsmd, i, config); + final JdbcFieldInfo columnFieldInfo = + JdbcToArrowUtils.getJdbcFieldInfoForColumn(rsmd, i, config); ArrowType arrowType = config.getJdbcToArrowTypeConverter().apply(columnFieldInfo); - consumers[i - 1] = config.getJdbcConsumerGetter().apply( - arrowType, i, isColumnNullable(resultSet.getMetaData(), i, columnFieldInfo), root.getVector(i - 1), config); + consumers[i - 1] = + config + .getJdbcConsumerGetter() + .apply( + arrowType, + i, + isColumnNullable(resultSet.getMetaData(), i, columnFieldInfo), + root.getVector(i - 1), + config); } } @@ -170,16 +169,17 @@ public boolean hasNext() { } /** - * Gets the next vector. - * If {@link JdbcToArrowConfig#isReuseVectorSchemaRoot()} is false, - * the client is responsible for freeing its resources. + * Gets the next vector. If {@link JdbcToArrowConfig#isReuseVectorSchemaRoot()} is false, the + * client is responsible for freeing its resources. + * * @throws JdbcConsumerException on error from VectorConsumer */ @Override public VectorSchemaRoot next() { Preconditions.checkArgument(hasNext()); try { - VectorSchemaRoot ret = config.isReuseVectorSchemaRoot() ? nextBatch : createVectorSchemaRoot(); + VectorSchemaRoot ret = + config.isReuseVectorSchemaRoot() ? nextBatch : createVectorSchemaRoot(); load(ret); return ret; } catch (Exception e) { @@ -193,8 +193,9 @@ public VectorSchemaRoot next() { } /** - * Clean up resources ONLY WHEN THE {@link VectorSchemaRoot} HOLDING EACH BATCH IS REUSED. If a new VectorSchemaRoot - * is created for each batch, each root must be closed manually by the client code. + * Clean up resources ONLY WHEN THE {@link VectorSchemaRoot} HOLDING EACH BATCH IS REUSED. If a + * new VectorSchemaRoot is created for each batch, each root must be closed manually by the client + * code. */ @Override public void close() { diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/Constants.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/Constants.java index f95133fc7e44c..30e734a68d511 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/Constants.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/Constants.java @@ -14,20 +14,15 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc; -/** - * String constants used for metadata returned on Vectors. - */ +/** String constants used for metadata returned on Vectors. */ public class Constants { - private Constants() { - } + private Constants() {} public static final String SQL_CATALOG_NAME_KEY = "SQL_CATALOG_NAME"; public static final String SQL_SCHEMA_NAME_KEY = "SQL_SCHEMA_NAME"; public static final String SQL_TABLE_NAME_KEY = "SQL_TABLE_NAME"; public static final String SQL_COLUMN_NAME_KEY = "SQL_COLUMN_NAME"; public static final String SQL_TYPE_KEY = "SQL_TYPE"; - } diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcFieldInfo.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcFieldInfo.java index d16964ea14417..6becac0bbc10c 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcFieldInfo.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcFieldInfo.java @@ -14,25 +14,25 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc; import java.sql.ResultSet; import java.sql.ResultSetMetaData; import java.sql.SQLException; import java.sql.Types; - import org.apache.arrow.util.Preconditions; /** - * This class represents the information about a JDBC ResultSet Field that is - * needed to construct an {@link org.apache.arrow.vector.types.pojo.ArrowType}. - * Currently, this is: + * This class represents the information about a JDBC ResultSet Field that is needed to construct an + * {@link org.apache.arrow.vector.types.pojo.ArrowType}. Currently, this is: + * *
    - *
  • The JDBC {@link java.sql.Types} type.
  • - *
  • The nullability.
  • - *
  • The field's precision (used for {@link java.sql.Types#DECIMAL} and {@link java.sql.Types#NUMERIC} types).
  • - *
  • The field's scale (used for {@link java.sql.Types#DECIMAL} and {@link java.sql.Types#NUMERIC} types).
  • + *
  • The JDBC {@link java.sql.Types} type. + *
  • The nullability. + *
  • The field's precision (used for {@link java.sql.Types#DECIMAL} and {@link + * java.sql.Types#NUMERIC} types). + *
  • The field's scale (used for {@link java.sql.Types#DECIMAL} and {@link + * java.sql.Types#NUMERIC} types). *
*/ public class JdbcFieldInfo { @@ -45,12 +45,13 @@ public class JdbcFieldInfo { private final int displaySize; /** - * Builds a JdbcFieldInfo using only the {@link java.sql.Types} type. Do not use this constructor - * if the field type is {@link java.sql.Types#DECIMAL} or {@link java.sql.Types#NUMERIC}; the precision and - * scale will be set to 0. + * Builds a JdbcFieldInfo using only the {@link java.sql.Types} type. Do not use this + * constructor if the field type is {@link java.sql.Types#DECIMAL} or {@link + * java.sql.Types#NUMERIC}; the precision and scale will be set to 0. * * @param jdbcType The {@link java.sql.Types} type. - * @throws IllegalArgumentException if jdbcType is {@link java.sql.Types#DECIMAL} or {@link java.sql.Types#NUMERIC}. + * @throws IllegalArgumentException if jdbcType is {@link java.sql.Types#DECIMAL} or {@link + * java.sql.Types#NUMERIC}. */ public JdbcFieldInfo(int jdbcType) { Preconditions.checkArgument( @@ -67,7 +68,8 @@ public JdbcFieldInfo(int jdbcType) { /** * Builds a JdbcFieldInfo from the {@link java.sql.Types} type, precision, and scale. - * Use this constructor for {@link java.sql.Types#DECIMAL} and {@link java.sql.Types#NUMERIC} types. + * Use this constructor for {@link java.sql.Types#DECIMAL} and {@link java.sql.Types#NUMERIC} + * types. * * @param jdbcType The {@link java.sql.Types} type. * @param precision The field's numeric precision. @@ -84,11 +86,13 @@ public JdbcFieldInfo(int jdbcType, int precision, int scale) { } /** - * Builds a JdbcFieldInfo from the {@link java.sql.Types} type, nullability, precision, and scale. + * Builds a JdbcFieldInfo from the {@link java.sql.Types} type, nullability, + * precision, and scale. * * @param jdbcType The {@link java.sql.Types} type. * @param nullability The nullability. Must be one of {@link ResultSetMetaData#columnNoNulls}, - * {@link ResultSetMetaData#columnNullable}, or {@link ResultSetMetaData#columnNullableUnknown}. + * {@link ResultSetMetaData#columnNullable}, or {@link + * ResultSetMetaData#columnNullableUnknown}. * @param precision The field's numeric precision. * @param scale The field's numeric scale. */ @@ -103,7 +107,8 @@ public JdbcFieldInfo(int jdbcType, int nullability, int precision, int scale) { } /** - * Builds a JdbcFieldInfo from the corresponding {@link java.sql.ResultSetMetaData} column. + * Builds a JdbcFieldInfo from the corresponding {@link java.sql.ResultSetMetaData} + * column. * * @param rsmd The {@link java.sql.ResultSetMetaData} to get the field information from. * @param column The column to get the field information for (on a 1-based index). @@ -113,10 +118,12 @@ public JdbcFieldInfo(int jdbcType, int nullability, int precision, int scale) { */ public JdbcFieldInfo(ResultSetMetaData rsmd, int column) throws SQLException { Preconditions.checkNotNull(rsmd, "ResultSetMetaData cannot be null."); - Preconditions.checkArgument(column > 0, "ResultSetMetaData columns have indices starting at 1."); + Preconditions.checkArgument( + column > 0, "ResultSetMetaData columns have indices starting at 1."); Preconditions.checkArgument( column <= rsmd.getColumnCount(), - "The index must be within the number of columns (1 to %s, inclusive)", rsmd.getColumnCount()); + "The index must be within the number of columns (1 to %s, inclusive)", + rsmd.getColumnCount()); this.column = column; this.jdbcType = rsmd.getColumnType(column); @@ -128,8 +135,8 @@ public JdbcFieldInfo(ResultSetMetaData rsmd, int column) throws SQLException { } /** - * Builds a JdbcFieldInfo from the corresponding row from a {@link java.sql.DatabaseMetaData#getColumns} - * ResultSet. + * Builds a JdbcFieldInfo from the corresponding row from a {@link + * java.sql.DatabaseMetaData#getColumns} ResultSet. * * @param rs The {@link java.sql.ResultSet} to get the field information from. * @throws SQLException If the column information cannot be retrieved. @@ -144,51 +151,42 @@ public JdbcFieldInfo(ResultSet rs) throws SQLException { this.displaySize = rs.getInt("CHAR_OCTET_LENGTH"); } - /** - * The {@link java.sql.Types} type. - */ + /** The {@link java.sql.Types} type. */ public int getJdbcType() { return jdbcType; } - /** - * The nullability. - */ + /** The nullability. */ public int isNullable() { return nullability; } /** - * The numeric precision, for {@link java.sql.Types#NUMERIC} and {@link java.sql.Types#DECIMAL} types. + * The numeric precision, for {@link java.sql.Types#NUMERIC} and {@link java.sql.Types#DECIMAL} + * types. */ public int getPrecision() { return precision; } /** - * The numeric scale, for {@link java.sql.Types#NUMERIC} and {@link java.sql.Types#DECIMAL} types. + * The numeric scale, for {@link java.sql.Types#NUMERIC} and {@link java.sql.Types#DECIMAL} types. */ public int getScale() { return scale; } - /** - * The column index for query column. - */ + /** The column index for query column. */ public int getColumn() { return column; } - /** - * The type name as reported by the database. - */ + /** The type name as reported by the database. */ public String getTypeName() { return typeName; } - /** - * The max number of characters for the column. - */ + /** The max number of characters for the column. */ public int getDisplaySize() { return displaySize; } diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcParameterBinder.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcParameterBinder.java index 2dfc0658cb8d1..fd4721bcd9c4e 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcParameterBinder.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcParameterBinder.java @@ -14,14 +14,12 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc; import java.sql.PreparedStatement; import java.sql.SQLException; import java.util.HashMap; import java.util.Map; - import org.apache.arrow.adapter.jdbc.binder.ColumnBinder; import org.apache.arrow.util.Preconditions; import org.apache.arrow.vector.VectorSchemaRoot; @@ -29,8 +27,8 @@ /** * A binder binds JDBC prepared statement parameters to rows of Arrow data from a VectorSchemaRoot. * - * Each row of the VectorSchemaRoot will be bound to the configured parameters of the PreparedStatement. - * One row of data is bound at a time. + *

Each row of the VectorSchemaRoot will be bound to the configured parameters of the + * PreparedStatement. One row of data is bound at a time. */ public class JdbcParameterBinder { private final PreparedStatement statement; @@ -44,8 +42,10 @@ public class JdbcParameterBinder { * * @param statement The statement to bind parameters to. * @param root The VectorSchemaRoot to pull data from. - * @param binders Column binders to translate from Arrow data to JDBC parameters, one per parameter. - * @param parameterIndices For each binder in binders, the index of the parameter to bind to. + * @param binders Column binders to translate from Arrow data to JDBC parameters, one per + * parameter. + * @param parameterIndices For each binder in binders, the index of the parameter to bind + * to. */ private JdbcParameterBinder( final PreparedStatement statement, @@ -55,7 +55,8 @@ private JdbcParameterBinder( Preconditions.checkArgument( binders.length == parameterIndices.length, "Number of column binders (%s) must equal number of parameter indices (%s)", - binders.length, parameterIndices.length); + binders.length, + parameterIndices.length); this.statement = statement; this.root = root; this.binders = binders; @@ -66,9 +67,10 @@ private JdbcParameterBinder( /** * Initialize a binder with a builder. * - * @param statement The statement to bind to. The binder does not maintain ownership of the statement. - * @param root The {@link VectorSchemaRoot} to pull data from. The binder does not maintain ownership - * of the vector schema root. + * @param statement The statement to bind to. The binder does not maintain ownership of the + * statement. + * @param root The {@link VectorSchemaRoot} to pull data from. The binder does not maintain + * ownership of the vector schema root. */ public static Builder builder(final PreparedStatement statement, final VectorSchemaRoot root) { return new Builder(statement, root); @@ -82,8 +84,8 @@ public void reset() { /** * Bind the next row of data to the parameters of the statement. * - * After this, the application should call the desired method on the prepared statement, - * such as {@link PreparedStatement#executeUpdate()}, or {@link PreparedStatement#addBatch()}. + *

After this, the application should call the desired method on the prepared statement, such + * as {@link PreparedStatement#executeUpdate()}, or {@link PreparedStatement#addBatch()}. * * @return true if a row was bound, false if rows were exhausted */ @@ -99,9 +101,7 @@ public boolean next() throws SQLException { return true; } - /** - * A builder for a {@link JdbcParameterBinder}. - */ + /** A builder for a {@link JdbcParameterBinder}. */ public static class Builder { private final PreparedStatement statement; private final VectorSchemaRoot root; @@ -123,9 +123,7 @@ public Builder bindAll() { /** Bind the given parameter to the given column using the default binder. */ public Builder bind(int parameterIndex, int columnIndex) { - return bind( - parameterIndex, - ColumnBinder.forVector(root.getVector(columnIndex))); + return bind(parameterIndex, ColumnBinder.forVector(root.getVector(columnIndex))); } /** Bind the given parameter using the given binder. */ diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrow.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrow.java index 246451b5b22f9..493e53056f945 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrow.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrow.java @@ -14,13 +14,11 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc; import java.io.IOException; import java.sql.ResultSet; import java.sql.SQLException; - import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.util.Preconditions; @@ -29,44 +27,32 @@ * *

This utility uses following data mapping to map JDBC/SQL datatype to Arrow data types. * - *

CHAR --> ArrowType.Utf8 - * NCHAR --> ArrowType.Utf8 - * VARCHAR --> ArrowType.Utf8 - * NVARCHAR --> ArrowType.Utf8 - * LONGVARCHAR --> ArrowType.Utf8 - * LONGNVARCHAR --> ArrowType.Utf8 - * NUMERIC --> ArrowType.Decimal(precision, scale) - * DECIMAL --> ArrowType.Decimal(precision, scale) - * BIT --> ArrowType.Bool - * TINYINT --> ArrowType.Int(8, signed) - * SMALLINT --> ArrowType.Int(16, signed) - * INTEGER --> ArrowType.Int(32, signed) - * BIGINT --> ArrowType.Int(64, signed) - * REAL --> ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE) - * FLOAT --> ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE) - * DOUBLE --> ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE) - * BINARY --> ArrowType.Binary - * VARBINARY --> ArrowType.Binary - * LONGVARBINARY --> ArrowType.Binary - * DATE --> ArrowType.Date(DateUnit.MILLISECOND) - * TIME --> ArrowType.Time(TimeUnit.MILLISECOND, 32) - * TIMESTAMP --> ArrowType.Timestamp(TimeUnit.MILLISECOND, timezone=null) - * CLOB --> ArrowType.Utf8 - * BLOB --> ArrowType.Binary + *

CHAR --> ArrowType.Utf8 NCHAR --> ArrowType.Utf8 VARCHAR --> ArrowType.Utf8 NVARCHAR --> + * ArrowType.Utf8 LONGVARCHAR --> ArrowType.Utf8 LONGNVARCHAR --> ArrowType.Utf8 NUMERIC --> + * ArrowType.Decimal(precision, scale) DECIMAL --> ArrowType.Decimal(precision, scale) BIT --> + * ArrowType.Bool TINYINT --> ArrowType.Int(8, signed) SMALLINT --> ArrowType.Int(16, signed) + * INTEGER --> ArrowType.Int(32, signed) BIGINT --> ArrowType.Int(64, signed) REAL --> + * ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE) FLOAT --> + * ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE) DOUBLE --> + * ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE) BINARY --> ArrowType.Binary VARBINARY --> + * ArrowType.Binary LONGVARBINARY --> ArrowType.Binary DATE --> ArrowType.Date(DateUnit.MILLISECOND) + * TIME --> ArrowType.Time(TimeUnit.MILLISECOND, 32) TIMESTAMP --> + * ArrowType.Timestamp(TimeUnit.MILLISECOND, timezone=null) CLOB --> ArrowType.Utf8 BLOB --> + * ArrowType.Binary * * @since 0.10.0 */ public class JdbcToArrow { /*----------------------------------------------------------------* - | | - | Partial Convert API | - | | - *----------------------------------------------------------------*/ + | | + | Partial Convert API | + | | + *----------------------------------------------------------------*/ /** - * For the given JDBC {@link ResultSet}, fetch the data from Relational DB and convert it to Arrow objects. - * Note here uses the default targetBatchSize = 1024. + * For the given JDBC {@link ResultSet}, fetch the data from Relational DB and convert it to Arrow + * objects. Note here uses the default targetBatchSize = 1024. * * @param resultSet ResultSet to use to fetch the data from underlying database * @param allocator Memory allocator @@ -74,28 +60,25 @@ public class JdbcToArrow { * @throws SQLException on error */ public static ArrowVectorIterator sqlToArrowVectorIterator( - ResultSet resultSet, - BufferAllocator allocator) - throws SQLException, IOException { + ResultSet resultSet, BufferAllocator allocator) throws SQLException, IOException { Preconditions.checkNotNull(allocator, "Memory Allocator object cannot be null"); - JdbcToArrowConfig config = - new JdbcToArrowConfig(allocator, JdbcToArrowUtils.getUtcCalendar()); + JdbcToArrowConfig config = new JdbcToArrowConfig(allocator, JdbcToArrowUtils.getUtcCalendar()); return sqlToArrowVectorIterator(resultSet, config); } /** - * For the given JDBC {@link ResultSet}, fetch the data from Relational DB and convert it to Arrow objects. - * Note if not specify {@link JdbcToArrowConfig#targetBatchSize}, will use default value 1024. + * For the given JDBC {@link ResultSet}, fetch the data from Relational DB and convert it to Arrow + * objects. Note if not specify {@link JdbcToArrowConfig#targetBatchSize}, will use default value + * 1024. + * * @param resultSet ResultSet to use to fetch the data from underlying database - * @param config Configuration of the conversion from JDBC to Arrow. + * @param config Configuration of the conversion from JDBC to Arrow. * @return Arrow Data Objects {@link ArrowVectorIterator} * @throws SQLException on error */ public static ArrowVectorIterator sqlToArrowVectorIterator( - ResultSet resultSet, - JdbcToArrowConfig config) - throws SQLException, IOException { + ResultSet resultSet, JdbcToArrowConfig config) throws SQLException, IOException { Preconditions.checkNotNull(resultSet, "JDBC ResultSet object cannot be null"); Preconditions.checkNotNull(config, "The configuration cannot be null"); return ArrowVectorIterator.create(resultSet, config); diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfig.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfig.java index 68851f4a98bc9..1bfcfc8fe00aa 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfig.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfig.java @@ -14,14 +14,12 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc; import java.math.RoundingMode; import java.util.Calendar; import java.util.Map; import java.util.function.Function; - import org.apache.arrow.adapter.jdbc.consumer.JdbcConsumer; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.util.Preconditions; @@ -30,25 +28,23 @@ /** * This class configures the JDBC-to-Arrow conversion process. - *

- * The allocator is used to construct the {@link org.apache.arrow.vector.VectorSchemaRoot}, - * and the calendar is used to define the time zone of any - * {@link org.apache.arrow.vector.types.pojo.ArrowType.Timestamp} - * fields that are created during the conversion. Neither field may be null. - *

- *

- * If the includeMetadata flag is set, the Arrow field metadata will contain information - * from the corresponding {@link java.sql.ResultSetMetaData} that was used to create the - * {@link org.apache.arrow.vector.types.pojo.FieldType} of the corresponding - * {@link org.apache.arrow.vector.FieldVector}. - *

- *

- * If there are any {@link java.sql.Types#ARRAY} fields in the {@link java.sql.ResultSet}, the corresponding - * {@link JdbcFieldInfo} for the array's contents must be defined here. Unfortunately, the sub-type - * information cannot be retrieved from all JDBC implementations (H2 for example, returns - * {@link java.sql.Types#NULL} for the array sub-type), so it must be configured here. The column index - * or name can be used to map to a {@link JdbcFieldInfo}, and that will be used for the conversion. - *

+ * + *

The allocator is used to construct the {@link org.apache.arrow.vector.VectorSchemaRoot}, and + * the calendar is used to define the time zone of any {@link + * org.apache.arrow.vector.types.pojo.ArrowType.Timestamp} fields that are created during the + * conversion. Neither field may be null. + * + *

If the includeMetadata flag is set, the Arrow field metadata will contain + * information from the corresponding {@link java.sql.ResultSetMetaData} that was used to create the + * {@link org.apache.arrow.vector.types.pojo.FieldType} of the corresponding {@link + * org.apache.arrow.vector.FieldVector}. + * + *

If there are any {@link java.sql.Types#ARRAY} fields in the {@link java.sql.ResultSet}, the + * corresponding {@link JdbcFieldInfo} for the array's contents must be defined here. Unfortunately, + * the sub-type information cannot be retrieved from all JDBC implementations (H2 for example, + * returns {@link java.sql.Types#NULL} for the array sub-type), so it must be configured here. The + * column index or name can be used to map to a {@link JdbcFieldInfo}, and that will be used for the + * conversion. */ public final class JdbcToArrowConfig { @@ -66,14 +62,12 @@ public final class JdbcToArrowConfig { private final Map> columnMetadataByColumnIndex; private final RoundingMode bigDecimalRoundingMode; /** - * The maximum rowCount to read each time when partially convert data. - * Default value is 1024 and -1 means disable partial read. - * default is -1 which means disable partial read. - * Note that this flag only useful for {@link JdbcToArrow#sqlToArrowVectorIterator} - * 1) if targetBatchSize != -1, it will convert full data into multiple vectors - * with valueCount no more than targetBatchSize. - * 2) if targetBatchSize == -1, it will convert full data into a single vector in {@link ArrowVectorIterator} - *

+ * The maximum rowCount to read each time when partially convert data. Default value is 1024 and + * -1 means disable partial read. default is -1 which means disable partial read. Note that this + * flag only useful for {@link JdbcToArrow#sqlToArrowVectorIterator} 1) if targetBatchSize != -1, + * it will convert full data into multiple vectors with valueCount no more than targetBatchSize. + * 2) if targetBatchSize == -1, it will convert full data into a single vector in {@link + * ArrowVectorIterator} */ private final int targetBatchSize; @@ -81,81 +75,100 @@ public final class JdbcToArrowConfig { private final JdbcConsumerFactory jdbcConsumerGetter; /** - * Constructs a new configuration from the provided allocator and calendar. The allocator - * is used when constructing the Arrow vectors from the ResultSet, and the calendar is used to define - * Arrow Timestamp fields, and to read time-based fields from the JDBC ResultSet. + * Constructs a new configuration from the provided allocator and calendar. The allocator + * is used when constructing the Arrow vectors from the ResultSet, and the calendar is + * used to define Arrow Timestamp fields, and to read time-based fields from the JDBC + * ResultSet. * - * @param allocator The memory allocator to construct the Arrow vectors with. - * @param calendar The calendar to use when constructing Timestamp fields and reading time-based results. + * @param allocator The memory allocator to construct the Arrow vectors with. + * @param calendar The calendar to use when constructing Timestamp fields and reading time-based + * results. */ JdbcToArrowConfig(BufferAllocator allocator, Calendar calendar) { - this(allocator, calendar, + this( + allocator, + calendar, /* include metadata */ false, /* reuse vector schema root */ false, /* array sub-types by column index */ null, /* array sub-types by column name */ null, - DEFAULT_TARGET_BATCH_SIZE, null, null); + DEFAULT_TARGET_BATCH_SIZE, + null, + null); } JdbcToArrowConfig( - BufferAllocator allocator, - Calendar calendar, - boolean includeMetadata, - boolean reuseVectorSchemaRoot, - Map arraySubTypesByColumnIndex, - Map arraySubTypesByColumnName, - int targetBatchSize, - Function jdbcToArrowTypeConverter) { - this(allocator, calendar, includeMetadata, reuseVectorSchemaRoot, arraySubTypesByColumnIndex, - arraySubTypesByColumnName, targetBatchSize, jdbcToArrowTypeConverter, null); + BufferAllocator allocator, + Calendar calendar, + boolean includeMetadata, + boolean reuseVectorSchemaRoot, + Map arraySubTypesByColumnIndex, + Map arraySubTypesByColumnName, + int targetBatchSize, + Function jdbcToArrowTypeConverter) { + this( + allocator, + calendar, + includeMetadata, + reuseVectorSchemaRoot, + arraySubTypesByColumnIndex, + arraySubTypesByColumnName, + targetBatchSize, + jdbcToArrowTypeConverter, + null); } /** - * Constructs a new configuration from the provided allocator and calendar. The allocator - * is used when constructing the Arrow vectors from the ResultSet, and the calendar is used to define - * Arrow Timestamp fields, and to read time-based fields from the JDBC ResultSet. + * Constructs a new configuration from the provided allocator and calendar. The allocator + * is used when constructing the Arrow vectors from the ResultSet, and the calendar is + * used to define Arrow Timestamp fields, and to read time-based fields from the JDBC + * ResultSet. * - * @param allocator The memory allocator to construct the Arrow vectors with. - * @param calendar The calendar to use when constructing Timestamp fields and reading time-based results. - * @param includeMetadata Whether to include JDBC field metadata in the Arrow Schema Field metadata. + * @param allocator The memory allocator to construct the Arrow vectors with. + * @param calendar The calendar to use when constructing Timestamp fields and reading time-based + * results. + * @param includeMetadata Whether to include JDBC field metadata in the Arrow Schema Field + * metadata. * @param reuseVectorSchemaRoot Whether to reuse the vector schema root for each data load. * @param arraySubTypesByColumnIndex The type of the JDBC array at the column index (1-based). - * @param arraySubTypesByColumnName The type of the JDBC array at the column name. - * @param targetBatchSize The target batch size to be used in preallocation of the resulting vectors. - * @param jdbcToArrowTypeConverter The function that maps JDBC field type information to arrow type. If set to null, - * the default mapping will be used, which is defined as: - *
    - *
  • CHAR --> ArrowType.Utf8
  • - *
  • NCHAR --> ArrowType.Utf8
  • - *
  • VARCHAR --> ArrowType.Utf8
  • - *
  • NVARCHAR --> ArrowType.Utf8
  • - *
  • LONGVARCHAR --> ArrowType.Utf8
  • - *
  • LONGNVARCHAR --> ArrowType.Utf8
  • - *
  • NUMERIC --> ArrowType.Decimal(precision, scale)
  • - *
  • DECIMAL --> ArrowType.Decimal(precision, scale)
  • - *
  • BIT --> ArrowType.Bool
  • - *
  • TINYINT --> ArrowType.Int(8, signed)
  • - *
  • SMALLINT --> ArrowType.Int(16, signed)
  • - *
  • INTEGER --> ArrowType.Int(32, signed)
  • - *
  • BIGINT --> ArrowType.Int(64, signed)
  • - *
  • REAL --> ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE)
  • - *
  • FLOAT --> ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE)
  • - *
  • DOUBLE --> ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE)
  • - *
  • BINARY --> ArrowType.Binary
  • - *
  • VARBINARY --> ArrowType.Binary
  • - *
  • LONGVARBINARY --> ArrowType.Binary
  • - *
  • DATE --> ArrowType.Date(DateUnit.DAY)
  • - *
  • TIME --> ArrowType.Time(TimeUnit.MILLISECOND, 32)
  • - *
  • TIMESTAMP --> ArrowType.Timestamp(TimeUnit.MILLISECOND, calendar timezone)
  • - *
  • CLOB --> ArrowType.Utf8
  • - *
  • BLOB --> ArrowType.Binary
  • - *
  • ARRAY --> ArrowType.List
  • - *
  • STRUCT --> ArrowType.Struct
  • - *
  • NULL --> ArrowType.Null
  • - *
- * @param bigDecimalRoundingMode The java.math.RoundingMode to be used in coercion of a BigDecimal from a - * ResultSet having a scale which does not match that of the target vector. Use null - * (default value) to require strict scale matching. + * @param arraySubTypesByColumnName The type of the JDBC array at the column name. + * @param targetBatchSize The target batch size to be used in preallocation of the resulting + * vectors. + * @param jdbcToArrowTypeConverter The function that maps JDBC field type information to arrow + * type. If set to null, the default mapping will be used, which is defined as: + *
    + *
  • CHAR --> ArrowType.Utf8 + *
  • NCHAR --> ArrowType.Utf8 + *
  • VARCHAR --> ArrowType.Utf8 + *
  • NVARCHAR --> ArrowType.Utf8 + *
  • LONGVARCHAR --> ArrowType.Utf8 + *
  • LONGNVARCHAR --> ArrowType.Utf8 + *
  • NUMERIC --> ArrowType.Decimal(precision, scale) + *
  • DECIMAL --> ArrowType.Decimal(precision, scale) + *
  • BIT --> ArrowType.Bool + *
  • TINYINT --> ArrowType.Int(8, signed) + *
  • SMALLINT --> ArrowType.Int(16, signed) + *
  • INTEGER --> ArrowType.Int(32, signed) + *
  • BIGINT --> ArrowType.Int(64, signed) + *
  • REAL --> ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE) + *
  • FLOAT --> ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE) + *
  • DOUBLE --> ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE) + *
  • BINARY --> ArrowType.Binary + *
  • VARBINARY --> ArrowType.Binary + *
  • LONGVARBINARY --> ArrowType.Binary + *
  • DATE --> ArrowType.Date(DateUnit.DAY) + *
  • TIME --> ArrowType.Time(TimeUnit.MILLISECOND, 32) + *
  • TIMESTAMP --> ArrowType.Timestamp(TimeUnit.MILLISECOND, calendar timezone) + *
  • CLOB --> ArrowType.Utf8 + *
  • BLOB --> ArrowType.Binary + *
  • ARRAY --> ArrowType.List + *
  • STRUCT --> ArrowType.Struct + *
  • NULL --> ArrowType.Null + *
+ * + * @param bigDecimalRoundingMode The java.math.RoundingMode to be used in coercion of a BigDecimal + * from a ResultSet having a scale which does not match that of the target vector. Use null + * (default value) to require strict scale matching. */ JdbcToArrowConfig( BufferAllocator allocator, @@ -245,16 +258,19 @@ public final class JdbcToArrowConfig { this.bigDecimalRoundingMode = bigDecimalRoundingMode; // set up type converter - this.jdbcToArrowTypeConverter = jdbcToArrowTypeConverter != null ? jdbcToArrowTypeConverter : - (jdbcFieldInfo) -> JdbcToArrowUtils.getArrowTypeFromJdbcType(jdbcFieldInfo, calendar); + this.jdbcToArrowTypeConverter = + jdbcToArrowTypeConverter != null + ? jdbcToArrowTypeConverter + : (jdbcFieldInfo) -> JdbcToArrowUtils.getArrowTypeFromJdbcType(jdbcFieldInfo, calendar); - this.jdbcConsumerGetter = jdbcConsumerGetter != null ? jdbcConsumerGetter : JdbcToArrowUtils::getConsumer; + this.jdbcConsumerGetter = + jdbcConsumerGetter != null ? jdbcConsumerGetter : JdbcToArrowUtils::getConsumer; } /** - * The calendar to use when defining Arrow Timestamp fields - * and retrieving {@link java.sql.Date}, {@link java.sql.Time}, or {@link java.sql.Timestamp} - * data types from the {@link java.sql.ResultSet}, or null if not converting. + * The calendar to use when defining Arrow Timestamp fields and retrieving {@link java.sql.Date}, + * {@link java.sql.Time}, or {@link java.sql.Timestamp} data types from the {@link + * java.sql.ResultSet}, or null if not converting. * * @return the calendar. */ @@ -280,30 +296,22 @@ public boolean shouldIncludeMetadata() { return includeMetadata; } - /** - * Get the target batch size for partial read. - */ + /** Get the target batch size for partial read. */ public int getTargetBatchSize() { return targetBatchSize; } - /** - * Get whether it is allowed to reuse the vector schema root. - */ + /** Get whether it is allowed to reuse the vector schema root. */ public boolean isReuseVectorSchemaRoot() { return reuseVectorSchemaRoot; } - /** - * Gets the mapping between JDBC type information to Arrow type. - */ + /** Gets the mapping between JDBC type information to Arrow type. */ public Function getJdbcToArrowTypeConverter() { return jdbcToArrowTypeConverter; } - /** - * Gets the JDBC consumer getter. - */ + /** Gets the JDBC consumer getter. */ public JdbcConsumerFactory getJdbcConsumerGetter() { return jdbcConsumerGetter; } @@ -311,8 +319,10 @@ public JdbcConsumerFactory getJdbcConsumerGetter() { /** * Returns the array sub-type {@link JdbcFieldInfo} defined for the provided column index. * - * @param index The {@link java.sql.ResultSetMetaData} column index of an {@link java.sql.Types#ARRAY} type. - * @return The {@link JdbcFieldInfo} for that array's sub-type, or null if not defined. + * @param index The {@link java.sql.ResultSetMetaData} column index of an {@link + * java.sql.Types#ARRAY} type. + * @return The {@link JdbcFieldInfo} for that array's sub-type, or null if not + * defined. */ public JdbcFieldInfo getArraySubTypeByColumnIndex(int index) { if (arraySubTypesByColumnIndex == null) { @@ -325,8 +335,10 @@ public JdbcFieldInfo getArraySubTypeByColumnIndex(int index) { /** * Returns the array sub-type {@link JdbcFieldInfo} defined for the provided column name. * - * @param name The {@link java.sql.ResultSetMetaData} column name of an {@link java.sql.Types#ARRAY} type. - * @return The {@link JdbcFieldInfo} for that array's sub-type, or null if not defined. + * @param name The {@link java.sql.ResultSetMetaData} column name of an {@link + * java.sql.Types#ARRAY} type. + * @return The {@link JdbcFieldInfo} for that array's sub-type, or null if not + * defined. */ public JdbcFieldInfo getArraySubTypeByColumnName(String name) { if (arraySubTypesByColumnName == null) { @@ -339,7 +351,8 @@ public JdbcFieldInfo getArraySubTypeByColumnName(String name) { /** * Returns the type {@link JdbcFieldInfo} explicitly defined for the provided column index. * - * @param index The {@link java.sql.ResultSetMetaData} column index to evaluate for explicit type mapping. + * @param index The {@link java.sql.ResultSetMetaData} column index to evaluate for explicit type + * mapping. * @return The {@link JdbcFieldInfo} defined for the column, or null if not defined. */ public JdbcFieldInfo getExplicitTypeByColumnIndex(int index) { @@ -353,7 +366,8 @@ public JdbcFieldInfo getExplicitTypeByColumnIndex(int index) { /** * Returns the type {@link JdbcFieldInfo} explicitly defined for the provided column name. * - * @param name The {@link java.sql.ResultSetMetaData} column name to evaluate for explicit type mapping. + * @param name The {@link java.sql.ResultSetMetaData} column name to evaluate for explicit type + * mapping. * @return The {@link JdbcFieldInfo} defined for the column, or null if not defined. */ public JdbcFieldInfo getExplicitTypeByColumnName(String name) { @@ -364,17 +378,12 @@ public JdbcFieldInfo getExplicitTypeByColumnName(String name) { } } - /** - * Return schema level metadata or null if not provided. - */ + /** Return schema level metadata or null if not provided. */ public Map getSchemaMetadata() { return schemaMetadata; } - /** - * Return metadata from columnIndex->meta map on per field basis - * or null if not provided. - */ + /** Return metadata from columnIndex->meta map on per field basis or null if not provided. */ public Map> getColumnMetadataByColumnIndex() { return columnMetadataByColumnIndex; } @@ -383,12 +392,14 @@ public RoundingMode getBigDecimalRoundingMode() { return bigDecimalRoundingMode; } - /** - * Interface for a function that gets a JDBC consumer for the given values. - */ + /** Interface for a function that gets a JDBC consumer for the given values. */ @FunctionalInterface public interface JdbcConsumerFactory { - JdbcConsumer apply(ArrowType arrowType, int columnIndex, boolean nullable, FieldVector vector, - JdbcToArrowConfig config); + JdbcConsumer apply( + ArrowType arrowType, + int columnIndex, + boolean nullable, + FieldVector vector, + JdbcToArrowConfig config); } } diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfigBuilder.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfigBuilder.java index 7d88c23832067..783a373c6d0a7 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfigBuilder.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfigBuilder.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc; import static org.apache.arrow.adapter.jdbc.JdbcToArrowConfig.DEFAULT_TARGET_BATCH_SIZE; @@ -23,15 +22,12 @@ import java.util.Calendar; import java.util.Map; import java.util.function.Function; - import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.util.Preconditions; import org.apache.arrow.vector.FieldVector; import org.apache.arrow.vector.types.pojo.ArrowType; -/** - * This class builds {@link JdbcToArrowConfig}s. - */ +/** This class builds {@link JdbcToArrowConfig}s. */ public class JdbcToArrowConfigBuilder { private Calendar calendar; private BufferAllocator allocator; @@ -49,9 +45,9 @@ public class JdbcToArrowConfigBuilder { private RoundingMode bigDecimalRoundingMode; /** - * Default constructor for the JdbcToArrowConfigBuilder}. - * Use the setter methods for the allocator and calendar; the allocator must be - * set. Otherwise, {@link #build()} will throw a {@link NullPointerException}. + * Default constructor for the JdbcToArrowConfigBuilder}. Use the setter methods for + * the allocator and calendar; the allocator must be set. Otherwise, {@link #build()} will throw a + * {@link NullPointerException}. */ public JdbcToArrowConfigBuilder() { this.allocator = null; @@ -68,16 +64,13 @@ public JdbcToArrowConfigBuilder() { } /** - * Constructor for the JdbcToArrowConfigBuilder. The - * allocator is required, and a {@link NullPointerException} - * will be thrown if it is null. - *

- * The allocator is used to construct Arrow vectors from the JDBC ResultSet. - * The calendar is used to determine the time zone of {@link java.sql.Timestamp} - * fields and convert {@link java.sql.Date}, {@link java.sql.Time}, and - * {@link java.sql.Timestamp} fields to a single, common time zone when reading - * from the result set. - *

+ * Constructor for the JdbcToArrowConfigBuilder. The allocator is required, and a + * {@link NullPointerException} will be thrown if it is null. + * + *

The allocator is used to construct Arrow vectors from the JDBC ResultSet. The calendar is + * used to determine the time zone of {@link java.sql.Timestamp} fields and convert {@link + * java.sql.Date}, {@link java.sql.Time}, and {@link java.sql.Timestamp} fields to a single, + * common time zone when reading from the result set. * * @param allocator The Arrow Vector memory allocator. * @param calendar The calendar to use when constructing timestamp fields. @@ -95,26 +88,23 @@ public JdbcToArrowConfigBuilder(BufferAllocator allocator, Calendar calendar) { } /** - * Constructor for the JdbcToArrowConfigBuilder. Both the - * allocator and calendar are required. A {@link NullPointerException} - * will be thrown if either of those arguments is null. - *

- * The allocator is used to construct Arrow vectors from the JDBC ResultSet. - * The calendar is used to determine the time zone of {@link java.sql.Timestamp} - * fields and convert {@link java.sql.Date}, {@link java.sql.Time}, and - * {@link java.sql.Timestamp} fields to a single, common time zone when reading - * from the result set. - *

- *

- * The includeMetadata argument, if true will cause - * various information about each database field to be added to the Vector - * Schema's field metadata. - *

+ * Constructor for the JdbcToArrowConfigBuilder. Both the allocator and calendar are + * required. A {@link NullPointerException} will be thrown if either of those arguments is + * null. + * + *

The allocator is used to construct Arrow vectors from the JDBC ResultSet. The calendar is + * used to determine the time zone of {@link java.sql.Timestamp} fields and convert {@link + * java.sql.Date}, {@link java.sql.Time}, and {@link java.sql.Timestamp} fields to a single, + * common time zone when reading from the result set. + * + *

The includeMetadata argument, if true will cause various + * information about each database field to be added to the Vector Schema's field metadata. * * @param allocator The Arrow Vector memory allocator. * @param calendar The calendar to use when constructing timestamp fields. */ - public JdbcToArrowConfigBuilder(BufferAllocator allocator, Calendar calendar, boolean includeMetadata) { + public JdbcToArrowConfigBuilder( + BufferAllocator allocator, Calendar calendar, boolean includeMetadata) { this(allocator, calendar); this.includeMetadata = includeMetadata; } @@ -132,8 +122,8 @@ public JdbcToArrowConfigBuilder setAllocator(BufferAllocator allocator) { } /** - * Sets the {@link Calendar} to use when constructing timestamp fields in the - * Arrow schema, and reading time-based fields from the JDBC ResultSet. + * Sets the {@link Calendar} to use when constructing timestamp fields in the Arrow schema, and + * reading time-based fields from the JDBC ResultSet. * * @param calendar the calendar to set. */ @@ -145,7 +135,8 @@ public JdbcToArrowConfigBuilder setCalendar(Calendar calendar) { /** * Sets whether to include JDBC ResultSet field metadata in the Arrow Schema field metadata. * - * @param includeMetadata Whether to include or exclude JDBC metadata in the Arrow Schema field metadata. + * @param includeMetadata Whether to include or exclude JDBC metadata in the Arrow Schema field + * metadata. * @return This instance of the JdbcToArrowConfig, for chaining. */ public JdbcToArrowConfigBuilder setIncludeMetadata(boolean includeMetadata) { @@ -154,8 +145,8 @@ public JdbcToArrowConfigBuilder setIncludeMetadata(boolean includeMetadata) { } /** - * Sets the mapping of column-index-to-{@link JdbcFieldInfo} used for columns of type {@link java.sql.Types#ARRAY}. - * The column index is 1-based, to match the JDBC column index. + * Sets the mapping of column-index-to-{@link JdbcFieldInfo} used for columns of type {@link + * java.sql.Types#ARRAY}. The column index is 1-based, to match the JDBC column index. * * @param map The mapping. * @return This instance of the JdbcToArrowConfig, for chaining. @@ -166,7 +157,8 @@ public JdbcToArrowConfigBuilder setArraySubTypeByColumnIndexMap(MapJdbcToArrowConfig, for chaining. @@ -178,11 +170,12 @@ public JdbcToArrowConfigBuilder setArraySubTypeByColumnNameMap(Map - * This can be useful to override type information from JDBC drivers that provide incomplete type info, - * e.g. DECIMAL with precision = scale = 0. - *

- * The column index is 1-based, to match the JDBC column index. + * + *

This can be useful to override type information from JDBC drivers that provide incomplete + * type info, e.g. DECIMAL with precision = scale = 0. + * + *

The column index is 1-based, to match the JDBC column index. + * * @param map The mapping. */ public JdbcToArrowConfigBuilder setExplicitTypesByColumnIndex(Map map) { @@ -192,9 +185,10 @@ public JdbcToArrowConfigBuilder setExplicitTypesByColumnIndex(Map - * This can be useful to override type information from JDBC drivers that provide incomplete type info, - * e.g. DECIMAL with precision = scale = 0. + * + *

This can be useful to override type information from JDBC drivers that provide incomplete + * type info, e.g. DECIMAL with precision = scale = 0. + * * @param map The mapping. */ public JdbcToArrowConfigBuilder setExplicitTypesByColumnName(Map map) { @@ -204,8 +198,8 @@ public JdbcToArrowConfigBuilder setExplicitTypesByColumnName(Map - * Use {@link JdbcToArrowConfig#NO_LIMIT_BATCH_SIZE} to read all rows at once. + * + *

Use {@link JdbcToArrowConfig#NO_LIMIT_BATCH_SIZE} to read all rows at once. */ public JdbcToArrowConfigBuilder setTargetBatchSize(int targetBatchSize) { this.targetBatchSize = targetBatchSize; @@ -214,8 +208,9 @@ public JdbcToArrowConfigBuilder setTargetBatchSize(int targetBatchSize) { /** * Set the function used to convert JDBC types to Arrow types. - *

- * Defaults to wrapping {@link JdbcToArrowUtils#getArrowTypeFromJdbcType(JdbcFieldInfo, Calendar)}. + * + *

Defaults to wrapping {@link JdbcToArrowUtils#getArrowTypeFromJdbcType(JdbcFieldInfo, + * Calendar)}. */ public JdbcToArrowConfigBuilder setJdbcToArrowTypeConverter( Function jdbcToArrowTypeConverter) { @@ -225,9 +220,9 @@ public JdbcToArrowConfigBuilder setJdbcToArrowTypeConverter( /** * Set the function used to get a JDBC consumer for a given type. - *

- * Defaults to wrapping {@link - * JdbcToArrowUtils#getConsumer(ArrowType, Integer, Boolean, FieldVector, JdbcToArrowConfig)}. + * + *

Defaults to wrapping {@link JdbcToArrowUtils#getConsumer(ArrowType, Integer, Boolean, + * FieldVector, JdbcToArrowConfig)}. */ public JdbcToArrowConfigBuilder setJdbcConsumerGetter( JdbcToArrowConfig.JdbcConsumerFactory jdbcConsumerGetter) { @@ -236,35 +231,32 @@ public JdbcToArrowConfigBuilder setJdbcConsumerGetter( } /** - * Set whether to use the same {@link org.apache.arrow.vector.VectorSchemaRoot} instance on each iteration, - * or to allocate a new one. + * Set whether to use the same {@link org.apache.arrow.vector.VectorSchemaRoot} instance on each + * iteration, or to allocate a new one. */ public JdbcToArrowConfigBuilder setReuseVectorSchemaRoot(boolean reuseVectorSchemaRoot) { this.reuseVectorSchemaRoot = reuseVectorSchemaRoot; return this; } - /** - * Set metadata for schema. - */ + /** Set metadata for schema. */ public JdbcToArrowConfigBuilder setSchemaMetadata(Map schemaMetadata) { this.schemaMetadata = schemaMetadata; return this; } - /** - * Set metadata from columnIndex->meta map on per field basis. - */ + /** Set metadata from columnIndex->meta map on per field basis. */ public JdbcToArrowConfigBuilder setColumnMetadataByColumnIndex( - Map> columnMetadataByColumnIndex) { + Map> columnMetadataByColumnIndex) { this.columnMetadataByColumnIndex = columnMetadataByColumnIndex; return this; } /** - * Set the rounding mode used when the scale of the actual value does not match the declared scale. - *

- * By default, an error is raised in such cases. + * Set the rounding mode used when the scale of the actual value does not match the declared + * scale. + * + *

By default, an error is raised in such cases. */ public JdbcToArrowConfigBuilder setBigDecimalRoundingMode(RoundingMode bigDecimalRoundingMode) { this.bigDecimalRoundingMode = bigDecimalRoundingMode; @@ -272,8 +264,8 @@ public JdbcToArrowConfigBuilder setBigDecimalRoundingMode(RoundingMode bigDecima } /** - * This builds the {@link JdbcToArrowConfig} from the provided - * {@link BufferAllocator} and {@link Calendar}. + * This builds the {@link JdbcToArrowConfig} from the provided {@link BufferAllocator} and {@link + * Calendar}. * * @return The built {@link JdbcToArrowConfig} * @throws NullPointerException if either the allocator or calendar was not set. diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowUtils.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowUtils.java index eaee49936079f..8397d4c9e0dc4 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowUtils.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowUtils.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc; import static org.apache.arrow.vector.types.FloatingPointPrecision.DOUBLE; @@ -38,7 +37,6 @@ import java.util.Locale; import java.util.Map; import java.util.TimeZone; - import org.apache.arrow.adapter.jdbc.consumer.ArrayConsumer; import org.apache.arrow.adapter.jdbc.consumer.BigIntConsumer; import org.apache.arrow.adapter.jdbc.consumer.BinaryConsumer; @@ -91,7 +89,8 @@ import org.apache.arrow.vector.util.ValueVectorUtility; /** - * Class that does most of the work to convert JDBC ResultSet data into Arrow columnar format Vector objects. + * Class that does most of the work to convert JDBC ResultSet data into Arrow columnar format Vector + * objects. * * @since 0.10.0 */ @@ -99,9 +98,7 @@ public class JdbcToArrowUtils { private static final int JDBC_ARRAY_VALUE_COLUMN = 2; - /** - * Returns the instance of a {java.util.Calendar} with the UTC time zone and root locale. - */ + /** Returns the instance of a {java.util.Calendar} with the UTC time zone and root locale. */ public static Calendar getUtcCalendar() { return Calendar.getInstance(TimeZone.getTimeZone("UTC"), Locale.ROOT); } @@ -114,7 +111,8 @@ public static Calendar getUtcCalendar() { * @return {@link Schema} * @throws SQLException on error */ - public static Schema jdbcToArrowSchema(ResultSetMetaData rsmd, Calendar calendar) throws SQLException { + public static Schema jdbcToArrowSchema(ResultSetMetaData rsmd, Calendar calendar) + throws SQLException { Preconditions.checkNotNull(calendar, "Calendar object can't be null"); return jdbcToArrowSchema(rsmd, new JdbcToArrowConfig(new RootAllocator(0), calendar)); @@ -123,25 +121,28 @@ public static Schema jdbcToArrowSchema(ResultSetMetaData rsmd, Calendar calendar /** * Create Arrow {@link Schema} object for the given JDBC {@link ResultSetMetaData}. * - * @param parameterMetaData The ResultSetMetaData containing the results, to read the JDBC metadata from. - * @param calendar The calendar to use the time zone field of, to construct Timestamp fields from. + * @param parameterMetaData The ResultSetMetaData containing the results, to read the JDBC + * metadata from. + * @param calendar The calendar to use the time zone field of, to construct Timestamp fields from. * @return {@link Schema} * @throws SQLException on error */ - public static Schema jdbcToArrowSchema(final ParameterMetaData parameterMetaData, final Calendar calendar) - throws SQLException { + public static Schema jdbcToArrowSchema( + final ParameterMetaData parameterMetaData, final Calendar calendar) throws SQLException { Preconditions.checkNotNull(calendar, "Calendar object can't be null"); Preconditions.checkNotNull(parameterMetaData); final List parameterFields = new ArrayList<>(parameterMetaData.getParameterCount()); - for (int parameterCounter = 1; parameterCounter <= parameterMetaData.getParameterCount(); - parameterCounter++) { + for (int parameterCounter = 1; + parameterCounter <= parameterMetaData.getParameterCount(); + parameterCounter++) { final int jdbcDataType = parameterMetaData.getParameterType(parameterCounter); final int jdbcIsNullable = parameterMetaData.isNullable(parameterCounter); final boolean arrowIsNullable = jdbcIsNullable != ParameterMetaData.parameterNoNulls; final int precision = parameterMetaData.getPrecision(parameterCounter); final int scale = parameterMetaData.getScale(parameterCounter); - final ArrowType arrowType = getArrowTypeFromJdbcType(new JdbcFieldInfo(jdbcDataType, precision, scale), calendar); - final FieldType fieldType = new FieldType(arrowIsNullable, arrowType, /*dictionary=*/null); + final ArrowType arrowType = + getArrowTypeFromJdbcType(new JdbcFieldInfo(jdbcDataType, precision, scale), calendar); + final FieldType fieldType = new FieldType(arrowIsNullable, arrowType, /*dictionary=*/ null); parameterFields.add(new Field(null, fieldType, null)); } @@ -152,10 +153,11 @@ public static Schema jdbcToArrowSchema(final ParameterMetaData parameterMetaData * Converts the provided JDBC type to its respective {@link ArrowType} counterpart. * * @param fieldInfo the {@link JdbcFieldInfo} with information about the original JDBC type. - * @param calendar the {@link Calendar} to use for datetime data types. + * @param calendar the {@link Calendar} to use for datetime data types. * @return a new {@link ArrowType}. */ - public static ArrowType getArrowTypeFromJdbcType(final JdbcFieldInfo fieldInfo, final Calendar calendar) { + public static ArrowType getArrowTypeFromJdbcType( + final JdbcFieldInfo fieldInfo, final Calendar calendar) { switch (fieldInfo.getJdbcType()) { case Types.BOOLEAN: case Types.BIT: @@ -222,30 +224,34 @@ public static ArrowType getArrowTypeFromJdbcType(final JdbcFieldInfo fieldInfo, /** * Create Arrow {@link Schema} object for the given JDBC {@link java.sql.ResultSetMetaData}. * - *

- * If {@link JdbcToArrowConfig#shouldIncludeMetadata()} returns true, the following fields - * will be added to the {@link FieldType#getMetadata()}: + *

If {@link JdbcToArrowConfig#shouldIncludeMetadata()} returns true, the + * following fields will be added to the {@link FieldType#getMetadata()}: + * *

    - *
  • {@link Constants#SQL_CATALOG_NAME_KEY} representing {@link ResultSetMetaData#getCatalogName(int)}
  • - *
  • {@link Constants#SQL_TABLE_NAME_KEY} representing {@link ResultSetMetaData#getTableName(int)}
  • - *
  • {@link Constants#SQL_COLUMN_NAME_KEY} representing {@link ResultSetMetaData#getColumnLabel(int)}
  • - *
  • {@link Constants#SQL_TYPE_KEY} representing {@link ResultSetMetaData#getColumnTypeName(int)}
  • + *
  • {@link Constants#SQL_CATALOG_NAME_KEY} representing {@link + * ResultSetMetaData#getCatalogName(int)} + *
  • {@link Constants#SQL_TABLE_NAME_KEY} representing {@link + * ResultSetMetaData#getTableName(int)} + *
  • {@link Constants#SQL_COLUMN_NAME_KEY} representing {@link + * ResultSetMetaData#getColumnLabel(int)} + *
  • {@link Constants#SQL_TYPE_KEY} representing {@link + * ResultSetMetaData#getColumnTypeName(int)} *
- *

- *

- * If any columns are of type {@link java.sql.Types#ARRAY}, the configuration object will be used to look up - * the array sub-type field. The {@link JdbcToArrowConfig#getArraySubTypeByColumnIndex(int)} method will be - * checked first, followed by the {@link JdbcToArrowConfig#getArraySubTypeByColumnName(String)} method. - *

+ * + *

If any columns are of type {@link java.sql.Types#ARRAY}, the configuration object will be + * used to look up the array sub-type field. The {@link + * JdbcToArrowConfig#getArraySubTypeByColumnIndex(int)} method will be checked first, followed by + * the {@link JdbcToArrowConfig#getArraySubTypeByColumnName(String)} method. * * @param rsmd The ResultSetMetaData containing the results, to read the JDBC metadata from. * @param config The configuration to use when constructing the schema. * @return {@link Schema} * @throws SQLException on error - * @throws IllegalArgumentException if rsmd contains an {@link java.sql.Types#ARRAY} but the - * config does not have a sub-type definition for it. + * @throws IllegalArgumentException if rsmd contains an {@link java.sql.Types#ARRAY} + * but the config does not have a sub-type definition for it. */ - public static Schema jdbcToArrowSchema(ResultSetMetaData rsmd, JdbcToArrowConfig config) throws SQLException { + public static Schema jdbcToArrowSchema(ResultSetMetaData rsmd, JdbcToArrowConfig config) + throws SQLException { Preconditions.checkNotNull(rsmd, "JDBC ResultSetMetaData object can't be null"); Preconditions.checkNotNull(config, "The configuration object must not be null"); @@ -254,8 +260,10 @@ public static Schema jdbcToArrowSchema(ResultSetMetaData rsmd, JdbcToArrowConfig for (int i = 1; i <= columnCount; i++) { final String columnName = rsmd.getColumnLabel(i); - final Map columnMetadata = config.getColumnMetadataByColumnIndex() != null ? - config.getColumnMetadataByColumnIndex().get(i) : null; + final Map columnMetadata = + config.getColumnMetadataByColumnIndex() != null + ? config.getColumnMetadataByColumnIndex().get(i) + : null; final Map metadata; if (config.shouldIncludeMetadata()) { metadata = new HashMap<>(); @@ -278,14 +286,19 @@ public static Schema jdbcToArrowSchema(ResultSetMetaData rsmd, JdbcToArrowConfig final JdbcFieldInfo columnFieldInfo = getJdbcFieldInfoForColumn(rsmd, i, config); final ArrowType arrowType = config.getJdbcToArrowTypeConverter().apply(columnFieldInfo); if (arrowType != null) { - final FieldType fieldType = new FieldType( - isColumnNullable(rsmd, i, columnFieldInfo), arrowType, /* dictionary encoding */ null, metadata); + final FieldType fieldType = + new FieldType( + isColumnNullable(rsmd, i, columnFieldInfo), + arrowType, /* dictionary encoding */ + null, + metadata); List children = null; if (arrowType.getTypeID() == ArrowType.List.TYPE_TYPE) { final JdbcFieldInfo arrayFieldInfo = getJdbcFieldInfoForArraySubType(rsmd, i, config); if (arrayFieldInfo == null) { - throw new IllegalArgumentException("Configuration does not provide a mapping for array column " + i); + throw new IllegalArgumentException( + "Configuration does not provide a mapping for array column " + i); } children = new ArrayList(); final ArrowType childType = config.getJdbcToArrowTypeConverter().apply(arrayFieldInfo); @@ -295,9 +308,13 @@ public static Schema jdbcToArrowSchema(ResultSetMetaData rsmd, JdbcToArrowConfig FieldType keyType = new FieldType(false, new ArrowType.Utf8(), null, null); FieldType valueType = new FieldType(false, new ArrowType.Utf8(), null, null); children = new ArrayList<>(); - children.add(new Field("child", mapType, - Arrays.asList(new Field(MapVector.KEY_NAME, keyType, null), - new Field(MapVector.VALUE_NAME, valueType, null)))); + children.add( + new Field( + "child", + mapType, + Arrays.asList( + new Field(MapVector.KEY_NAME, keyType, null), + new Field(MapVector.VALUE_NAME, valueType, null)))); } fields.add(new Field(columnName, fieldType, children)); @@ -307,18 +324,14 @@ public static Schema jdbcToArrowSchema(ResultSetMetaData rsmd, JdbcToArrowConfig } static JdbcFieldInfo getJdbcFieldInfoForColumn( - ResultSetMetaData rsmd, - int arrayColumn, - JdbcToArrowConfig config) - throws SQLException { + ResultSetMetaData rsmd, int arrayColumn, JdbcToArrowConfig config) throws SQLException { Preconditions.checkNotNull(rsmd, "ResultSet MetaData object cannot be null"); Preconditions.checkNotNull(config, "Configuration must not be null"); Preconditions.checkArgument( - arrayColumn > 0, - "ResultSetMetaData columns start with 1; column cannot be less than 1"); + arrayColumn > 0, "ResultSetMetaData columns start with 1; column cannot be less than 1"); Preconditions.checkArgument( - arrayColumn <= rsmd.getColumnCount(), - "Column number cannot be more than the number of columns"); + arrayColumn <= rsmd.getColumnCount(), + "Column number cannot be more than the number of columns"); JdbcFieldInfo fieldInfo = config.getExplicitTypeByColumnIndex(arrayColumn); if (fieldInfo == null) { @@ -334,16 +347,12 @@ static JdbcFieldInfo getJdbcFieldInfoForColumn( * If no sub-type can be found, returns null. */ private static JdbcFieldInfo getJdbcFieldInfoForArraySubType( - ResultSetMetaData rsmd, - int arrayColumn, - JdbcToArrowConfig config) - throws SQLException { + ResultSetMetaData rsmd, int arrayColumn, JdbcToArrowConfig config) throws SQLException { Preconditions.checkNotNull(rsmd, "ResultSet MetaData object cannot be null"); Preconditions.checkNotNull(config, "Configuration must not be null"); Preconditions.checkArgument( - arrayColumn > 0, - "ResultSetMetaData columns start with 1; column cannot be less than 1"); + arrayColumn > 0, "ResultSetMetaData columns start with 1; column cannot be less than 1"); Preconditions.checkArgument( arrayColumn <= rsmd.getColumnCount(), "Column number cannot be more than the number of columns"); @@ -359,10 +368,10 @@ private static JdbcFieldInfo getJdbcFieldInfoForArraySubType( * Iterate the given JDBC {@link ResultSet} object to fetch the data and transpose it to populate * the given Arrow Vector objects. * - * @param rs ResultSet to use to fetch the data from underlying database - * @param root Arrow {@link VectorSchemaRoot} object to populate - * @param calendar The calendar to use when reading {@link Date}, {@link Time}, or {@link Timestamp} - * data types from the {@link ResultSet}, or null if not converting. + * @param rs ResultSet to use to fetch the data from underlying database + * @param root Arrow {@link VectorSchemaRoot} object to populate + * @param calendar The calendar to use when reading {@link Date}, {@link Time}, or {@link + * Timestamp} data types from the {@link ResultSet}, or null if not converting. * @throws SQLException on error */ public static void jdbcToArrowVectors(ResultSet rs, VectorSchemaRoot root, Calendar calendar) @@ -373,29 +382,30 @@ public static void jdbcToArrowVectors(ResultSet rs, VectorSchemaRoot root, Calen jdbcToArrowVectors(rs, root, new JdbcToArrowConfig(new RootAllocator(0), calendar)); } - static boolean isColumnNullable(ResultSetMetaData resultSetMetadata, int index, JdbcFieldInfo info) - throws SQLException { + static boolean isColumnNullable( + ResultSetMetaData resultSetMetadata, int index, JdbcFieldInfo info) throws SQLException { int nullableValue; if (info != null && info.isNullable() != ResultSetMetaData.columnNullableUnknown) { nullableValue = info.isNullable(); } else { nullableValue = resultSetMetadata.isNullable(index); } - return nullableValue == ResultSetMetaData.columnNullable || - nullableValue == ResultSetMetaData.columnNullableUnknown; + return nullableValue == ResultSetMetaData.columnNullable + || nullableValue == ResultSetMetaData.columnNullableUnknown; } /** * Iterate the given JDBC {@link ResultSet} object to fetch the data and transpose it to populate * the given Arrow Vector objects. * - * @param rs ResultSet to use to fetch the data from underlying database - * @param root Arrow {@link VectorSchemaRoot} object to populate + * @param rs ResultSet to use to fetch the data from underlying database + * @param root Arrow {@link VectorSchemaRoot} object to populate * @param config The configuration to use when reading the data. * @throws SQLException on error * @throws JdbcConsumerException on error from VectorConsumer */ - public static void jdbcToArrowVectors(ResultSet rs, VectorSchemaRoot root, JdbcToArrowConfig config) + public static void jdbcToArrowVectors( + ResultSet rs, VectorSchemaRoot root, JdbcToArrowConfig config) throws SQLException, IOException { ResultSetMetaData rsmd = rs.getMetaData(); @@ -405,8 +415,13 @@ public static void jdbcToArrowVectors(ResultSet rs, VectorSchemaRoot root, JdbcT for (int i = 1; i <= columnCount; i++) { FieldVector vector = root.getVector(rsmd.getColumnLabel(i)); final JdbcFieldInfo columnFieldInfo = getJdbcFieldInfoForColumn(rsmd, i, config); - consumers[i - 1] = getConsumer( - vector.getField().getType(), i, isColumnNullable(rsmd, i, columnFieldInfo), vector, config); + consumers[i - 1] = + getConsumer( + vector.getField().getType(), + i, + isColumnNullable(rsmd, i, columnFieldInfo), + vector, + config); } CompositeJdbcConsumer compositeConsumer = null; @@ -439,18 +454,22 @@ public static void jdbcToArrowVectors(ResultSet rs, VectorSchemaRoot root, JdbcT } /** - * Default function used for JdbcConsumerFactory. This function gets a JdbcConsumer for the - * given column based on the Arrow type and provided vector. + * Default function used for JdbcConsumerFactory. This function gets a JdbcConsumer for the given + * column based on the Arrow type and provided vector. * - * @param arrowType Arrow type for the column. + * @param arrowType Arrow type for the column. * @param columnIndex Column index to fetch from the ResultSet - * @param nullable Whether the value is nullable or not - * @param vector Vector to store the consumed value - * @param config Associated JdbcToArrowConfig, used mainly for the Calendar. + * @param nullable Whether the value is nullable or not + * @param vector Vector to store the consumed value + * @param config Associated JdbcToArrowConfig, used mainly for the Calendar. * @return {@link JdbcConsumer} */ - public static JdbcConsumer getConsumer(ArrowType arrowType, int columnIndex, boolean nullable, - FieldVector vector, JdbcToArrowConfig config) { + public static JdbcConsumer getConsumer( + ArrowType arrowType, + int columnIndex, + boolean nullable, + FieldVector vector, + JdbcToArrowConfig config) { final Calendar calendar = config.getCalendar(); switch (arrowType.getTypeID()) { @@ -472,10 +491,11 @@ public static JdbcConsumer getConsumer(ArrowType arrowType, int columnIndex, boo case Decimal: final RoundingMode bigDecimalRoundingMode = config.getBigDecimalRoundingMode(); if (((ArrowType.Decimal) arrowType).getBitWidth() == 256) { - return Decimal256Consumer.createConsumer((Decimal256Vector) vector, columnIndex, nullable, - bigDecimalRoundingMode); + return Decimal256Consumer.createConsumer( + (Decimal256Vector) vector, columnIndex, nullable, bigDecimalRoundingMode); } else { - return DecimalConsumer.createConsumer((DecimalVector) vector, columnIndex, nullable, bigDecimalRoundingMode); + return DecimalConsumer.createConsumer( + (DecimalVector) vector, columnIndex, nullable, bigDecimalRoundingMode); } case FloatingPoint: switch (((ArrowType.FloatingPoint) arrowType).getPrecision()) { @@ -495,17 +515,25 @@ public static JdbcConsumer getConsumer(ArrowType arrowType, int columnIndex, boo case Date: return DateConsumer.createConsumer((DateDayVector) vector, columnIndex, nullable, calendar); case Time: - return TimeConsumer.createConsumer((TimeMilliVector) vector, columnIndex, nullable, calendar); + return TimeConsumer.createConsumer( + (TimeMilliVector) vector, columnIndex, nullable, calendar); case Timestamp: if (config.getCalendar() == null) { - return TimestampConsumer.createConsumer((TimeStampMilliVector) vector, columnIndex, nullable); + return TimestampConsumer.createConsumer( + (TimeStampMilliVector) vector, columnIndex, nullable); } else { - return TimestampTZConsumer.createConsumer((TimeStampMilliTZVector) vector, columnIndex, nullable, calendar); + return TimestampTZConsumer.createConsumer( + (TimeStampMilliTZVector) vector, columnIndex, nullable, calendar); } case List: FieldVector childVector = ((ListVector) vector).getDataVector(); - JdbcConsumer delegate = getConsumer(childVector.getField().getType(), JDBC_ARRAY_VALUE_COLUMN, - childVector.getField().isNullable(), childVector, config); + JdbcConsumer delegate = + getConsumer( + childVector.getField().getType(), + JDBC_ARRAY_VALUE_COLUMN, + childVector.getField().isNullable(), + childVector, + config); return ArrayConsumer.createConsumer((ListVector) vector, delegate, columnIndex, nullable); case Map: return MapConsumer.createConsumer((MapVector) vector, columnIndex, nullable); diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/BaseColumnBinder.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/BaseColumnBinder.java index f24f409072c0d..d7b62c43acf6f 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/BaseColumnBinder.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/BaseColumnBinder.java @@ -14,13 +14,13 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.binder; import org.apache.arrow.vector.FieldVector; /** * Base class for ColumnBinder implementations. + * * @param The concrete FieldVector subtype. */ public abstract class BaseColumnBinder implements ColumnBinder { diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/BigIntBinder.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/BigIntBinder.java index fde4642ef90a5..b9dfcb0d6c956 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/BigIntBinder.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/BigIntBinder.java @@ -14,13 +14,11 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.binder; import java.sql.PreparedStatement; import java.sql.SQLException; import java.sql.Types; - import org.apache.arrow.vector.BigIntVector; /** A column binder for 8-bit integers. */ @@ -34,7 +32,8 @@ public BigIntBinder(BigIntVector vector, int jdbcType) { } @Override - public void bind(PreparedStatement statement, int parameterIndex, int rowIndex) throws SQLException { + public void bind(PreparedStatement statement, int parameterIndex, int rowIndex) + throws SQLException { final long value = vector.getDataBuffer().getLong((long) rowIndex * BigIntVector.TYPE_WIDTH); statement.setLong(parameterIndex, value); } diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/BitBinder.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/BitBinder.java index adae513e99e7c..c9db194f652ff 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/BitBinder.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/BitBinder.java @@ -14,13 +14,11 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.binder; import java.sql.PreparedStatement; import java.sql.SQLException; import java.sql.Types; - import org.apache.arrow.vector.BitVector; /** A column binder for booleans. */ @@ -34,7 +32,8 @@ public BitBinder(BitVector vector, int jdbcType) { } @Override - public void bind(PreparedStatement statement, int parameterIndex, int rowIndex) throws SQLException { + public void bind(PreparedStatement statement, int parameterIndex, int rowIndex) + throws SQLException { // See BitVector#getBit final int byteIndex = rowIndex >> 3; final byte b = vector.getDataBuffer().getByte(byteIndex); diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/ColumnBinder.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/ColumnBinder.java index c2b1259e1424b..c38db68234ecf 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/ColumnBinder.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/ColumnBinder.java @@ -14,17 +14,13 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.binder; import java.sql.PreparedStatement; import java.sql.SQLException; - import org.apache.arrow.vector.FieldVector; -/** - * A helper to bind values from a wrapped Arrow vector to a JDBC PreparedStatement. - */ +/** A helper to bind values from a wrapped Arrow vector to a JDBC PreparedStatement. */ public interface ColumnBinder { /** * Bind the given row to the given parameter. @@ -43,14 +39,10 @@ public interface ColumnBinder { */ int getJdbcType(); - /** - * Get the vector used by this binder. - */ + /** Get the vector used by this binder. */ FieldVector getVector(); - /** - * Create a column binder for a vector, using the default JDBC type code for null values. - */ + /** Create a column binder for a vector, using the default JDBC type code for null values. */ static ColumnBinder forVector(FieldVector vector) { return forVector(vector, /*jdbcType*/ null); } @@ -62,7 +54,8 @@ static ColumnBinder forVector(FieldVector vector) { * @param jdbcType The JDBC type code to use (or null to use the default). */ static ColumnBinder forVector(FieldVector vector, Integer jdbcType) { - final ColumnBinder binder = vector.getField().getType().accept(new ColumnBinderArrowTypeVisitor(vector, jdbcType)); + final ColumnBinder binder = + vector.getField().getType().accept(new ColumnBinderArrowTypeVisitor(vector, jdbcType)); if (vector.getField().isNullable()) { return new NullableColumnBinder(binder); } diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/ColumnBinderArrowTypeVisitor.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/ColumnBinderArrowTypeVisitor.java index dc708724043d0..30b2305f3f916 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/ColumnBinderArrowTypeVisitor.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/ColumnBinderArrowTypeVisitor.java @@ -14,14 +14,12 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.binder; import java.sql.Types; import java.time.ZoneId; import java.util.Calendar; import java.util.TimeZone; - import org.apache.arrow.vector.BigIntVector; import org.apache.arrow.vector.BitVector; import org.apache.arrow.vector.DateDayVector; @@ -50,8 +48,8 @@ /** * Visitor to create the base ColumnBinder for a vector. - *

- * To handle null values, wrap the returned binder in a {@link NullableColumnBinder}. + * + *

To handle null values, wrap the returned binder in a {@link NullableColumnBinder}. */ public class ColumnBinderArrowTypeVisitor implements ArrowType.ArrowTypeVisitor { private final FieldVector vector; @@ -111,17 +109,21 @@ public ColumnBinder visit(ArrowType.Int type) { } switch (type.getBitWidth()) { case 8: - return jdbcType == null ? new TinyIntBinder((TinyIntVector) vector) : - new TinyIntBinder((TinyIntVector) vector, jdbcType); + return jdbcType == null + ? new TinyIntBinder((TinyIntVector) vector) + : new TinyIntBinder((TinyIntVector) vector, jdbcType); case 16: - return jdbcType == null ? new SmallIntBinder((SmallIntVector) vector) : - new SmallIntBinder((SmallIntVector) vector, jdbcType); + return jdbcType == null + ? new SmallIntBinder((SmallIntVector) vector) + : new SmallIntBinder((SmallIntVector) vector, jdbcType); case 32: - return jdbcType == null ? new IntBinder((IntVector) vector) : - new IntBinder((IntVector) vector, jdbcType); + return jdbcType == null + ? new IntBinder((IntVector) vector) + : new IntBinder((IntVector) vector, jdbcType); case 64: - return jdbcType == null ? new BigIntBinder((BigIntVector) vector) : - new BigIntBinder((BigIntVector) vector, jdbcType); + return jdbcType == null + ? new BigIntBinder((BigIntVector) vector) + : new BigIntBinder((BigIntVector) vector, jdbcType); default: throw new UnsupportedOperationException("No column binder implemented for type " + type); } @@ -131,11 +133,13 @@ public ColumnBinder visit(ArrowType.Int type) { public ColumnBinder visit(ArrowType.FloatingPoint type) { switch (type.getPrecision()) { case SINGLE: - return jdbcType == null ? new Float4Binder((Float4Vector) vector) : - new Float4Binder((Float4Vector) vector, jdbcType); + return jdbcType == null + ? new Float4Binder((Float4Vector) vector) + : new Float4Binder((Float4Vector) vector, jdbcType); case DOUBLE: - return jdbcType == null ? new Float8Binder((Float8Vector) vector) : - new Float8Binder((Float8Vector) vector, jdbcType); + return jdbcType == null + ? new Float8Binder((Float8Vector) vector) + : new Float8Binder((Float8Vector) vector, jdbcType); default: throw new UnsupportedOperationException("No column binder implemented for type " + type); } @@ -144,51 +148,74 @@ public ColumnBinder visit(ArrowType.FloatingPoint type) { @Override public ColumnBinder visit(ArrowType.Utf8 type) { VarCharVector varChar = (VarCharVector) vector; - return jdbcType == null ? new VarCharBinder<>(varChar, Types.VARCHAR) : - new VarCharBinder<>(varChar, jdbcType); + return jdbcType == null + ? new VarCharBinder<>(varChar, Types.VARCHAR) + : new VarCharBinder<>(varChar, jdbcType); + } + + @Override + public ColumnBinder visit(ArrowType.Utf8View type) { + throw new UnsupportedOperationException( + "Column binder implemented for type " + type + " is not supported"); } @Override public ColumnBinder visit(ArrowType.LargeUtf8 type) { LargeVarCharVector varChar = (LargeVarCharVector) vector; - return jdbcType == null ? new VarCharBinder<>(varChar, Types.LONGVARCHAR) : - new VarCharBinder<>(varChar, jdbcType); + return jdbcType == null + ? new VarCharBinder<>(varChar, Types.LONGVARCHAR) + : new VarCharBinder<>(varChar, jdbcType); } @Override public ColumnBinder visit(ArrowType.Binary type) { VarBinaryVector varBinary = (VarBinaryVector) vector; - return jdbcType == null ? new VarBinaryBinder<>(varBinary, Types.VARBINARY) : - new VarBinaryBinder<>(varBinary, jdbcType); + return jdbcType == null + ? new VarBinaryBinder<>(varBinary, Types.VARBINARY) + : new VarBinaryBinder<>(varBinary, jdbcType); + } + + @Override + public ColumnBinder visit(ArrowType.BinaryView type) { + throw new UnsupportedOperationException( + "Column binder implemented for type " + type + " is not supported"); } @Override public ColumnBinder visit(ArrowType.LargeBinary type) { LargeVarBinaryVector varBinary = (LargeVarBinaryVector) vector; - return jdbcType == null ? new VarBinaryBinder<>(varBinary, Types.LONGVARBINARY) : - new VarBinaryBinder<>(varBinary, jdbcType); + return jdbcType == null + ? new VarBinaryBinder<>(varBinary, Types.LONGVARBINARY) + : new VarBinaryBinder<>(varBinary, jdbcType); } @Override public ColumnBinder visit(ArrowType.FixedSizeBinary type) { FixedSizeBinaryVector binary = (FixedSizeBinaryVector) vector; - return jdbcType == null ? new FixedSizeBinaryBinder(binary, Types.BINARY) : - new FixedSizeBinaryBinder(binary, jdbcType); + return jdbcType == null + ? new FixedSizeBinaryBinder(binary, Types.BINARY) + : new FixedSizeBinaryBinder(binary, jdbcType); } @Override public ColumnBinder visit(ArrowType.Bool type) { - return jdbcType == null ? new BitBinder((BitVector) vector) : new BitBinder((BitVector) vector, jdbcType); + return jdbcType == null + ? new BitBinder((BitVector) vector) + : new BitBinder((BitVector) vector, jdbcType); } @Override public ColumnBinder visit(ArrowType.Decimal type) { if (type.getBitWidth() == 128) { DecimalVector decimalVector = (DecimalVector) vector; - return jdbcType == null ? new Decimal128Binder(decimalVector) : new Decimal128Binder(decimalVector, jdbcType); + return jdbcType == null + ? new Decimal128Binder(decimalVector) + : new Decimal128Binder(decimalVector, jdbcType); } else if (type.getBitWidth() == 256) { Decimal256Vector decimalVector = (Decimal256Vector) vector; - return jdbcType == null ? new Decimal256Binder(decimalVector) : new Decimal256Binder(decimalVector, jdbcType); + return jdbcType == null + ? new Decimal256Binder(decimalVector) + : new Decimal256Binder(decimalVector, jdbcType); } throw new UnsupportedOperationException("No column binder implemented for type " + type); } @@ -197,11 +224,13 @@ public ColumnBinder visit(ArrowType.Decimal type) { public ColumnBinder visit(ArrowType.Date type) { switch (type.getUnit()) { case DAY: - return jdbcType == null ? new DateDayBinder((DateDayVector) vector) : - new DateDayBinder((DateDayVector) vector, /*calendar*/null, jdbcType); + return jdbcType == null + ? new DateDayBinder((DateDayVector) vector) + : new DateDayBinder((DateDayVector) vector, /*calendar*/ null, jdbcType); case MILLISECOND: - return jdbcType == null ? new DateMilliBinder((DateMilliVector) vector) : - new DateMilliBinder((DateMilliVector) vector, /*calendar*/null, jdbcType); + return jdbcType == null + ? new DateMilliBinder((DateMilliVector) vector) + : new DateMilliBinder((DateMilliVector) vector, /*calendar*/ null, jdbcType); default: throw new UnsupportedOperationException("No column binder implemented for type " + type); } @@ -211,17 +240,21 @@ public ColumnBinder visit(ArrowType.Date type) { public ColumnBinder visit(ArrowType.Time type) { switch (type.getUnit()) { case SECOND: - return jdbcType == null ? new Time32Binder((TimeSecVector) vector) : - new Time32Binder((TimeSecVector) vector, jdbcType); + return jdbcType == null + ? new Time32Binder((TimeSecVector) vector) + : new Time32Binder((TimeSecVector) vector, jdbcType); case MILLISECOND: - return jdbcType == null ? new Time32Binder((TimeMilliVector) vector) : - new Time32Binder((TimeMilliVector) vector, jdbcType); + return jdbcType == null + ? new Time32Binder((TimeMilliVector) vector) + : new Time32Binder((TimeMilliVector) vector, jdbcType); case MICROSECOND: - return jdbcType == null ? new Time64Binder((TimeMicroVector) vector) : - new Time64Binder((TimeMicroVector) vector, jdbcType); + return jdbcType == null + ? new Time64Binder((TimeMicroVector) vector) + : new Time64Binder((TimeMicroVector) vector, jdbcType); case NANOSECOND: - return jdbcType == null ? new Time64Binder((TimeNanoVector) vector) : - new Time64Binder((TimeNanoVector) vector, jdbcType); + return jdbcType == null + ? new Time64Binder((TimeNanoVector) vector) + : new Time64Binder((TimeNanoVector) vector, jdbcType); default: throw new UnsupportedOperationException("No column binder implemented for type " + type); } @@ -246,4 +279,9 @@ public ColumnBinder visit(ArrowType.Interval type) { public ColumnBinder visit(ArrowType.Duration type) { throw new UnsupportedOperationException("No column binder implemented for type " + type); } + + @Override + public ColumnBinder visit(ArrowType.ListView type) { + throw new UnsupportedOperationException("No column binder implemented for type " + type); + } } diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/DateDayBinder.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/DateDayBinder.java index bc16790c8f391..b9eae464c8aa2 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/DateDayBinder.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/DateDayBinder.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.binder; import java.sql.Date; @@ -22,12 +21,9 @@ import java.sql.SQLException; import java.sql.Types; import java.util.Calendar; - import org.apache.arrow.vector.DateDayVector; -/** - * A column binder for 32-bit dates. - */ +/** A column binder for 32-bit dates. */ public class DateDayBinder extends BaseColumnBinder { private static final long MILLIS_PER_DAY = 86_400_000; private final Calendar calendar; @@ -46,7 +42,8 @@ public DateDayBinder(DateDayVector vector, Calendar calendar, int jdbcType) { } @Override - public void bind(PreparedStatement statement, int parameterIndex, int rowIndex) throws SQLException { + public void bind(PreparedStatement statement, int parameterIndex, int rowIndex) + throws SQLException { // TODO: multiply with overflow final long index = (long) rowIndex * DateDayVector.TYPE_WIDTH; final Date value = new Date(vector.getDataBuffer().getInt(index) * MILLIS_PER_DAY); diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/DateMilliBinder.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/DateMilliBinder.java index 5cb91b46ac179..f320391fbed5b 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/DateMilliBinder.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/DateMilliBinder.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.binder; import java.sql.Date; @@ -22,12 +21,9 @@ import java.sql.SQLException; import java.sql.Types; import java.util.Calendar; - import org.apache.arrow.vector.DateMilliVector; -/** - * A column binder for 64-bit dates. - */ +/** A column binder for 64-bit dates. */ public class DateMilliBinder extends BaseColumnBinder { private final Calendar calendar; @@ -39,14 +35,14 @@ public DateMilliBinder(DateMilliVector vector, Calendar calendar) { this(vector, calendar, Types.DATE); } - public DateMilliBinder(DateMilliVector vector, Calendar calendar, int jdbcType) { super(vector, jdbcType); this.calendar = calendar; } @Override - public void bind(PreparedStatement statement, int parameterIndex, int rowIndex) throws SQLException { + public void bind(PreparedStatement statement, int parameterIndex, int rowIndex) + throws SQLException { final long index = (long) rowIndex * DateMilliVector.TYPE_WIDTH; final Date value = new Date(vector.getDataBuffer().getLong(index)); if (calendar == null) { diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/Decimal128Binder.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/Decimal128Binder.java index 9e9d0e4fdb25b..07ef52f2e594c 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/Decimal128Binder.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/Decimal128Binder.java @@ -14,20 +14,16 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.binder; import java.math.BigDecimal; import java.sql.PreparedStatement; import java.sql.SQLException; import java.sql.Types; - import org.apache.arrow.vector.DecimalVector; import org.apache.arrow.vector.util.DecimalUtility; -/** - * A binder for 128-bit decimals. - */ +/** A binder for 128-bit decimals. */ public class Decimal128Binder extends BaseColumnBinder { public Decimal128Binder(DecimalVector vector) { this(vector, Types.DECIMAL); @@ -38,9 +34,11 @@ public Decimal128Binder(DecimalVector vector, int jdbcType) { } @Override - public void bind(PreparedStatement statement, int parameterIndex, int rowIndex) throws SQLException { - final BigDecimal value = DecimalUtility.getBigDecimalFromArrowBuf( - vector.getDataBuffer(), rowIndex, vector.getScale(), DecimalVector.TYPE_WIDTH); + public void bind(PreparedStatement statement, int parameterIndex, int rowIndex) + throws SQLException { + final BigDecimal value = + DecimalUtility.getBigDecimalFromArrowBuf( + vector.getDataBuffer(), rowIndex, vector.getScale(), DecimalVector.TYPE_WIDTH); statement.setBigDecimal(parameterIndex, value); } } diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/Decimal256Binder.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/Decimal256Binder.java index bd29e083b4513..5a4222f6b84db 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/Decimal256Binder.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/Decimal256Binder.java @@ -14,20 +14,16 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.binder; import java.math.BigDecimal; import java.sql.PreparedStatement; import java.sql.SQLException; import java.sql.Types; - import org.apache.arrow.vector.Decimal256Vector; import org.apache.arrow.vector.util.DecimalUtility; -/** - * A binder for 256-bit decimals. - */ +/** A binder for 256-bit decimals. */ public class Decimal256Binder extends BaseColumnBinder { public Decimal256Binder(Decimal256Vector vector) { this(vector, Types.DECIMAL); @@ -38,9 +34,11 @@ public Decimal256Binder(Decimal256Vector vector, int jdbcType) { } @Override - public void bind(PreparedStatement statement, int parameterIndex, int rowIndex) throws SQLException { - final BigDecimal value = DecimalUtility.getBigDecimalFromArrowBuf( - vector.getDataBuffer(), rowIndex, vector.getScale(), Decimal256Vector.TYPE_WIDTH); + public void bind(PreparedStatement statement, int parameterIndex, int rowIndex) + throws SQLException { + final BigDecimal value = + DecimalUtility.getBigDecimalFromArrowBuf( + vector.getDataBuffer(), rowIndex, vector.getScale(), Decimal256Vector.TYPE_WIDTH); statement.setBigDecimal(parameterIndex, value); } } diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/FixedSizeBinaryBinder.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/FixedSizeBinaryBinder.java index 7edc5e4532985..4f74b1fa8cfd4 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/FixedSizeBinaryBinder.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/FixedSizeBinaryBinder.java @@ -14,22 +14,18 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.binder; import java.sql.PreparedStatement; import java.sql.SQLException; - import org.apache.arrow.vector.FixedSizeBinaryVector; -/** - * A binder for fixed-width binary types. - */ +/** A binder for fixed-width binary types. */ public class FixedSizeBinaryBinder extends BaseColumnBinder { /** * Create a binder for the given vector using the given JDBC type for null values. * - * @param vector The vector to draw values from. + * @param vector The vector to draw values from. * @param jdbcType The JDBC type code. */ public FixedSizeBinaryBinder(FixedSizeBinaryVector vector, int jdbcType) { @@ -37,9 +33,12 @@ public FixedSizeBinaryBinder(FixedSizeBinaryVector vector, int jdbcType) { } @Override - public void bind(PreparedStatement statement, int parameterIndex, int rowIndex) throws SQLException { + public void bind(PreparedStatement statement, int parameterIndex, int rowIndex) + throws SQLException { byte[] binaryData = new byte[vector.getByteWidth()]; - vector.getDataBuffer().getBytes((long) rowIndex * binaryData.length, binaryData, 0, binaryData.length); + vector + .getDataBuffer() + .getBytes((long) rowIndex * binaryData.length, binaryData, 0, binaryData.length); statement.setBytes(parameterIndex, binaryData); } } diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/Float4Binder.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/Float4Binder.java index a471c1ebadd66..466a67a2dbc89 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/Float4Binder.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/Float4Binder.java @@ -14,18 +14,14 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.binder; import java.sql.PreparedStatement; import java.sql.SQLException; import java.sql.Types; - import org.apache.arrow.vector.Float4Vector; -/** - * A binder for 32-bit floats. - */ +/** A binder for 32-bit floats. */ public class Float4Binder extends BaseColumnBinder { public Float4Binder(Float4Vector vector) { this(vector, Types.REAL); @@ -36,7 +32,8 @@ public Float4Binder(Float4Vector vector, int jdbcType) { } @Override - public void bind(PreparedStatement statement, int parameterIndex, int rowIndex) throws SQLException { + public void bind(PreparedStatement statement, int parameterIndex, int rowIndex) + throws SQLException { final float value = vector.getDataBuffer().getFloat((long) rowIndex * Float4Vector.TYPE_WIDTH); statement.setFloat(parameterIndex, value); } diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/Float8Binder.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/Float8Binder.java index 4710c3b59860d..222bebf115372 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/Float8Binder.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/Float8Binder.java @@ -14,18 +14,14 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.binder; import java.sql.PreparedStatement; import java.sql.SQLException; import java.sql.Types; - import org.apache.arrow.vector.Float8Vector; -/** - * A binder for 64-bit floats. - */ +/** A binder for 64-bit floats. */ public class Float8Binder extends BaseColumnBinder { public Float8Binder(Float8Vector vector) { this(vector, Types.DOUBLE); @@ -36,8 +32,10 @@ public Float8Binder(Float8Vector vector, int jdbcType) { } @Override - public void bind(PreparedStatement statement, int parameterIndex, int rowIndex) throws SQLException { - final double value = vector.getDataBuffer().getDouble((long) rowIndex * Float8Vector.TYPE_WIDTH); + public void bind(PreparedStatement statement, int parameterIndex, int rowIndex) + throws SQLException { + final double value = + vector.getDataBuffer().getDouble((long) rowIndex * Float8Vector.TYPE_WIDTH); statement.setDouble(parameterIndex, value); } } diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/IntBinder.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/IntBinder.java index 7d47f585a39d9..6b49eeb5352b1 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/IntBinder.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/IntBinder.java @@ -14,13 +14,11 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.binder; import java.sql.PreparedStatement; import java.sql.SQLException; import java.sql.Types; - import org.apache.arrow.vector.IntVector; /** A column binder for 32-bit integers. */ @@ -34,7 +32,8 @@ public IntBinder(IntVector vector, int jdbcType) { } @Override - public void bind(PreparedStatement statement, int parameterIndex, int rowIndex) throws SQLException { + public void bind(PreparedStatement statement, int parameterIndex, int rowIndex) + throws SQLException { final int value = vector.getDataBuffer().getInt((long) rowIndex * IntVector.TYPE_WIDTH); statement.setInt(parameterIndex, value); } diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/ListBinder.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/ListBinder.java index b8aa61234f4e9..25172c0c1f0aa 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/ListBinder.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/ListBinder.java @@ -14,21 +14,17 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.binder; import java.lang.reflect.Array; import java.util.ArrayList; import java.util.Arrays; - import org.apache.arrow.vector.FieldVector; import org.apache.arrow.vector.complex.ListVector; import org.apache.arrow.vector.complex.impl.UnionListReader; import org.apache.arrow.vector.util.Text; -/** - * A column binder for list of primitive values. - */ +/** A column binder for list of primitive values. */ public class ListBinder extends BaseColumnBinder { private final UnionListReader listReader; @@ -52,7 +48,9 @@ public ListBinder(ListVector vector, int jdbcType) { try { arrayElementClass = dataVectorClass.getMethod("getObject", Integer.TYPE).getReturnType(); } catch (NoSuchMethodException e) { - final String message = String.format("Issue to determine type for getObject method of data vector class %s ", + final String message = + String.format( + "Issue to determine type for getObject method of data vector class %s ", dataVectorClass.getName()); throw new RuntimeException(message); } @@ -60,7 +58,8 @@ public ListBinder(ListVector vector, int jdbcType) { } @Override - public void bind(java.sql.PreparedStatement statement, int parameterIndex, int rowIndex)throws java.sql.SQLException { + public void bind(java.sql.PreparedStatement statement, int parameterIndex, int rowIndex) + throws java.sql.SQLException { listReader.setPosition(rowIndex); ArrayList sourceArray = (ArrayList) listReader.readObject(); Object array; @@ -69,7 +68,9 @@ public void bind(java.sql.PreparedStatement statement, int parameterIndex, int r Arrays.setAll((Object[]) array, sourceArray::get); } else { array = new String[sourceArray.size()]; - Arrays.setAll((Object[]) array, idx -> sourceArray.get(idx) != null ? sourceArray.get(idx).toString() : null); + Arrays.setAll( + (Object[]) array, + idx -> sourceArray.get(idx) != null ? sourceArray.get(idx).toString() : null); } statement.setObject(parameterIndex, array); } diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/MapBinder.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/MapBinder.java index 07391eb7cbfb4..e94f186453581 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/MapBinder.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/MapBinder.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.binder; import java.sql.PreparedStatement; @@ -23,16 +22,13 @@ import java.util.LinkedHashMap; import java.util.List; import java.util.Objects; - import org.apache.arrow.vector.complex.MapVector; import org.apache.arrow.vector.complex.impl.UnionMapReader; import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.util.JsonStringHashMap; -/** - * A column binder for map of primitive values. - */ +/** A column binder for map of primitive values. */ public class MapBinder extends BaseColumnBinder { private UnionMapReader reader; @@ -58,8 +54,8 @@ public MapBinder(MapVector vector, int jdbcType) { } List keyValueFields = Objects.requireNonNull(structField.get(0)).getChildren(); if (keyValueFields.size() != 2) { - throw new IllegalArgumentException("Expected two children fields " + - "inside nested Struct field in Map"); + throw new IllegalArgumentException( + "Expected two children fields " + "inside nested Struct field in Map"); } ArrowType keyType = Objects.requireNonNull(keyValueFields.get(0)).getType(); ArrowType valueType = Objects.requireNonNull(keyValueFields.get(1)).getType(); @@ -68,15 +64,16 @@ public MapBinder(MapVector vector, int jdbcType) { } @Override - public void bind(PreparedStatement statement, - int parameterIndex, int rowIndex) throws SQLException { + public void bind(PreparedStatement statement, int parameterIndex, int rowIndex) + throws SQLException { reader.setPosition(rowIndex); LinkedHashMap tags = new JsonStringHashMap<>(); while (reader.next()) { Object key = reader.key().readObject(); Object value = reader.value().readObject(); - tags.put(isTextKey && key != null ? key.toString() : key, - isTextValue && value != null ? value.toString() : value); + tags.put( + isTextKey && key != null ? key.toString() : key, + isTextValue && value != null ? value.toString() : value); } switch (jdbcType) { case Types.VARCHAR: diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/NullableColumnBinder.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/NullableColumnBinder.java index 123b587ca50d4..bf5288b173341 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/NullableColumnBinder.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/NullableColumnBinder.java @@ -14,17 +14,13 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.binder; import java.sql.PreparedStatement; import java.sql.SQLException; - import org.apache.arrow.vector.FieldVector; -/** - * A ColumnBinder that checks for nullability before deferring to a type-specific binder. - */ +/** A ColumnBinder that checks for nullability before deferring to a type-specific binder. */ public class NullableColumnBinder implements ColumnBinder { private final ColumnBinder wrapped; @@ -33,7 +29,8 @@ public NullableColumnBinder(ColumnBinder wrapped) { } @Override - public void bind(PreparedStatement statement, int parameterIndex, int rowIndex) throws SQLException { + public void bind(PreparedStatement statement, int parameterIndex, int rowIndex) + throws SQLException { if (wrapped.getVector().isNull(rowIndex)) { statement.setNull(parameterIndex, wrapped.getJdbcType()); } else { diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/SmallIntBinder.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/SmallIntBinder.java index f9d744b9f5497..aa636c9336f55 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/SmallIntBinder.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/SmallIntBinder.java @@ -14,13 +14,11 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.binder; import java.sql.PreparedStatement; import java.sql.SQLException; import java.sql.Types; - import org.apache.arrow.vector.SmallIntVector; /** A column binder for 8-bit integers. */ @@ -34,8 +32,10 @@ public SmallIntBinder(SmallIntVector vector, int jdbcType) { } @Override - public void bind(PreparedStatement statement, int parameterIndex, int rowIndex) throws SQLException { - final short value = vector.getDataBuffer().getShort((short) rowIndex * SmallIntVector.TYPE_WIDTH); + public void bind(PreparedStatement statement, int parameterIndex, int rowIndex) + throws SQLException { + final short value = + vector.getDataBuffer().getShort((short) rowIndex * SmallIntVector.TYPE_WIDTH); statement.setShort(parameterIndex, value); } } diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/Time32Binder.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/Time32Binder.java index 5dc7e3f513f97..4e09c3be23264 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/Time32Binder.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/Time32Binder.java @@ -14,21 +14,17 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.binder; import java.sql.PreparedStatement; import java.sql.SQLException; import java.sql.Time; import java.sql.Types; - import org.apache.arrow.vector.BaseFixedWidthVector; import org.apache.arrow.vector.TimeMilliVector; import org.apache.arrow.vector.TimeSecVector; -/** - * A binder for 32-bit time types. - */ +/** A binder for 32-bit time types. */ public class Time32Binder extends BaseColumnBinder { private static final long TYPE_WIDTH = 4; @@ -43,11 +39,11 @@ public Time32Binder(TimeMilliVector vector) { } public Time32Binder(TimeSecVector vector, int jdbcType) { - this(vector, /*factor*/1_000, jdbcType); + this(vector, /*factor*/ 1_000, jdbcType); } public Time32Binder(TimeMilliVector vector, int jdbcType) { - this(vector, /*factor*/1, jdbcType); + this(vector, /*factor*/ 1, jdbcType); } Time32Binder(BaseFixedWidthVector vector, long factor, int jdbcType) { @@ -56,7 +52,8 @@ public Time32Binder(TimeMilliVector vector, int jdbcType) { } @Override - public void bind(PreparedStatement statement, int parameterIndex, int rowIndex) throws SQLException { + public void bind(PreparedStatement statement, int parameterIndex, int rowIndex) + throws SQLException { // TODO: multiply with overflow // TODO: take in a Calendar as well? final Time value = new Time(vector.getDataBuffer().getInt(rowIndex * TYPE_WIDTH) * factor); diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/Time64Binder.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/Time64Binder.java index 8d62ae0eb36df..01c85fb32f1b5 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/Time64Binder.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/Time64Binder.java @@ -14,21 +14,17 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.binder; import java.sql.PreparedStatement; import java.sql.SQLException; import java.sql.Time; import java.sql.Types; - import org.apache.arrow.vector.BaseFixedWidthVector; import org.apache.arrow.vector.TimeMicroVector; import org.apache.arrow.vector.TimeNanoVector; -/** - * A binder for 64-bit time types. - */ +/** A binder for 64-bit time types. */ public class Time64Binder extends BaseColumnBinder { private static final long TYPE_WIDTH = 8; @@ -43,11 +39,11 @@ public Time64Binder(TimeNanoVector vector) { } public Time64Binder(TimeMicroVector vector, int jdbcType) { - this(vector, /*factor*/1_000, jdbcType); + this(vector, /*factor*/ 1_000, jdbcType); } public Time64Binder(TimeNanoVector vector, int jdbcType) { - this(vector, /*factor*/1_000_000, jdbcType); + this(vector, /*factor*/ 1_000_000, jdbcType); } Time64Binder(BaseFixedWidthVector vector, long factor, int jdbcType) { @@ -56,7 +52,8 @@ public Time64Binder(TimeNanoVector vector, int jdbcType) { } @Override - public void bind(PreparedStatement statement, int parameterIndex, int rowIndex) throws SQLException { + public void bind(PreparedStatement statement, int parameterIndex, int rowIndex) + throws SQLException { // TODO: option to throw on truncation (vendor Guava IntMath#multiply) final Time value = new Time(vector.getDataBuffer().getLong(rowIndex * TYPE_WIDTH) / factor); statement.setTime(parameterIndex, value); diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/TimeStampBinder.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/TimeStampBinder.java index 6677e5909901a..942d7ae58dcd5 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/TimeStampBinder.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/TimeStampBinder.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.binder; import java.sql.PreparedStatement; @@ -22,7 +21,6 @@ import java.sql.Timestamp; import java.sql.Types; import java.util.Calendar; - import org.apache.arrow.vector.TimeStampVector; import org.apache.arrow.vector.types.pojo.ArrowType; @@ -32,15 +30,17 @@ public class TimeStampBinder extends BaseColumnBinder { private final long unitsPerSecond; private final long nanosPerUnit; - /** - * Create a binder for a timestamp vector using the default JDBC type code. - */ + /** Create a binder for a timestamp vector using the default JDBC type code. */ public TimeStampBinder(TimeStampVector vector, Calendar calendar) { - this(vector, calendar, isZoned(vector.getField().getType()) ? Types.TIMESTAMP_WITH_TIMEZONE : Types.TIMESTAMP); + this( + vector, + calendar, + isZoned(vector.getField().getType()) ? Types.TIMESTAMP_WITH_TIMEZONE : Types.TIMESTAMP); } /** * Create a binder for a timestamp vector. + * * @param vector The vector to pull values from. * @param calendar Optionally, the calendar to pass to JDBC. * @param jdbcType The JDBC type code to use for null values. @@ -73,19 +73,23 @@ public TimeStampBinder(TimeStampVector vector, Calendar calendar, int jdbcType) } @Override - public void bind(PreparedStatement statement, int parameterIndex, int rowIndex) throws SQLException { + public void bind(PreparedStatement statement, int parameterIndex, int rowIndex) + throws SQLException { // TODO: option to throw on truncation (vendor Guava IntMath#multiply) or overflow - final long rawValue = vector.getDataBuffer().getLong((long) rowIndex * TimeStampVector.TYPE_WIDTH); + final long rawValue = + vector.getDataBuffer().getLong((long) rowIndex * TimeStampVector.TYPE_WIDTH); final long seconds = rawValue / unitsPerSecond; final int nanos = (int) ((rawValue - (seconds * unitsPerSecond)) * nanosPerUnit); final Timestamp value = new Timestamp(seconds * 1_000); value.setNanos(nanos); if (calendar != null) { - // Timestamp == Date == UTC timestamp (confusingly). Arrow's timestamp with timezone is a UTC value with a + // Timestamp == Date == UTC timestamp (confusingly). Arrow's timestamp with timezone is a UTC + // value with a // zone offset, so we don't need to do any conversion. statement.setTimestamp(parameterIndex, value, calendar); } else { - // Arrow timestamp without timezone isn't strictly convertible to any timezone. So this is technically wrong, + // Arrow timestamp without timezone isn't strictly convertible to any timezone. So this is + // technically wrong, // but there is no 'correct' interpretation here. The application should provide a calendar. statement.setTimestamp(parameterIndex, value); } diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/TinyIntBinder.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/TinyIntBinder.java index f51d139be863a..0580456d37983 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/TinyIntBinder.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/TinyIntBinder.java @@ -14,13 +14,11 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.binder; import java.sql.PreparedStatement; import java.sql.SQLException; import java.sql.Types; - import org.apache.arrow.vector.TinyIntVector; /** A column binder for 8-bit integers. */ @@ -34,7 +32,8 @@ public TinyIntBinder(TinyIntVector vector, int jdbcType) { } @Override - public void bind(PreparedStatement statement, int parameterIndex, int rowIndex) throws SQLException { + public void bind(PreparedStatement statement, int parameterIndex, int rowIndex) + throws SQLException { final byte value = vector.getDataBuffer().getByte((long) rowIndex * TinyIntVector.TYPE_WIDTH); statement.setByte(parameterIndex, value); } diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/VarBinaryBinder.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/VarBinaryBinder.java index a94cff6a00496..41807efc611b1 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/VarBinaryBinder.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/VarBinaryBinder.java @@ -14,12 +14,10 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.binder; import java.sql.PreparedStatement; import java.sql.SQLException; - import org.apache.arrow.memory.util.ArrowBufPointer; import org.apache.arrow.vector.ElementAddressableVector; import org.apache.arrow.vector.FieldVector; @@ -29,13 +27,14 @@ * * @param The binary vector. */ -public class VarBinaryBinder extends BaseColumnBinder { +public class VarBinaryBinder + extends BaseColumnBinder { private final ArrowBufPointer element; /** * Create a binder for the given vector using the given JDBC type for null values. * - * @param vector The vector to draw values from. + * @param vector The vector to draw values from. * @param jdbcType The JDBC type code. */ public VarBinaryBinder(T vector, int jdbcType) { @@ -44,15 +43,18 @@ public VarBinaryBinder(T vector, int jdbcType) { } @Override - public void bind(PreparedStatement statement, int parameterIndex, int rowIndex) throws SQLException { + public void bind(PreparedStatement statement, int parameterIndex, int rowIndex) + throws SQLException { vector.getDataPointer(rowIndex, element); if (element.getBuf() == null) { statement.setNull(parameterIndex, jdbcType); return; } if (element.getLength() > (long) Integer.MAX_VALUE) { - final String message = String.format("Length of value at index %d (%d) exceeds Integer.MAX_VALUE", - rowIndex, element.getLength()); + final String message = + String.format( + "Length of value at index %d (%d) exceeds Integer.MAX_VALUE", + rowIndex, element.getLength()); throw new RuntimeException(message); } byte[] binaryData = new byte[(int) element.getLength()]; diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/VarCharBinder.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/VarCharBinder.java index 73bd55981490b..926e1da28c9a0 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/VarCharBinder.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/VarCharBinder.java @@ -14,13 +14,11 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.binder; import java.nio.charset.StandardCharsets; import java.sql.PreparedStatement; import java.sql.SQLException; - import org.apache.arrow.memory.util.ArrowBufPointer; import org.apache.arrow.vector.FieldVector; import org.apache.arrow.vector.VariableWidthVector; @@ -30,13 +28,14 @@ * * @param The text vector. */ -public class VarCharBinder extends BaseColumnBinder { +public class VarCharBinder + extends BaseColumnBinder { private final ArrowBufPointer element; /** * Create a binder for the given vector using the given JDBC type for null values. * - * @param vector The vector to draw values from. + * @param vector The vector to draw values from. * @param jdbcType The JDBC type code. */ public VarCharBinder(T vector, int jdbcType) { @@ -45,15 +44,18 @@ public VarCharBinder(T vector, int jdbcType) { } @Override - public void bind(PreparedStatement statement, int parameterIndex, int rowIndex) throws SQLException { + public void bind(PreparedStatement statement, int parameterIndex, int rowIndex) + throws SQLException { vector.getDataPointer(rowIndex, element); if (element.getBuf() == null) { statement.setNull(parameterIndex, jdbcType); return; } if (element.getLength() > (long) Integer.MAX_VALUE) { - final String message = String.format("Length of value at index %d (%d) exceeds Integer.MAX_VALUE", - rowIndex, element.getLength()); + final String message = + String.format( + "Length of value at index %d (%d) exceeds Integer.MAX_VALUE", + rowIndex, element.getLength()); throw new RuntimeException(message); } byte[] utf8Bytes = new byte[(int) element.getLength()]; diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/package-info.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/package-info.java index 4f8936e0c27bf..945c3c9f84fa8 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/package-info.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/package-info.java @@ -15,8 +15,5 @@ * limitations under the License. */ -/** - * Utilities to bind Arrow data as JDBC prepared statement parameters. - */ - +/** Utilities to bind Arrow data as JDBC prepared statement parameters. */ package org.apache.arrow.adapter.jdbc.binder; diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/ArrayConsumer.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/ArrayConsumer.java index 2f18b8a416d34..4676e8204eed4 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/ArrayConsumer.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/ArrayConsumer.java @@ -14,29 +14,25 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.consumer; import java.io.IOException; import java.sql.Array; import java.sql.ResultSet; import java.sql.SQLException; - import org.apache.arrow.vector.FieldVector; import org.apache.arrow.vector.ValueVector; import org.apache.arrow.vector.complex.ListVector; /** - * Consumer which consume array type values from {@link ResultSet}. - * Write the data to {@link org.apache.arrow.vector.complex.ListVector}. + * Consumer which consume array type values from {@link ResultSet}. Write the data to {@link + * org.apache.arrow.vector.complex.ListVector}. */ public abstract class ArrayConsumer extends BaseConsumer { - /** - * Creates a consumer for {@link ListVector}. - */ + /** Creates a consumer for {@link ListVector}. */ public static ArrayConsumer createConsumer( - ListVector vector, JdbcConsumer delegate, int index, boolean nullable) { + ListVector vector, JdbcConsumer delegate, int index, boolean nullable) { if (nullable) { return new ArrayConsumer.NullableArrayConsumer(vector, delegate, index); } else { @@ -50,9 +46,7 @@ public static ArrayConsumer createConsumer( protected int innerVectorIndex = 0; - /** - * Instantiate a ArrayConsumer. - */ + /** Instantiate a ArrayConsumer. */ public ArrayConsumer(ListVector vector, JdbcConsumer delegate, int index) { super(vector, index); this.delegate = delegate; @@ -81,14 +75,10 @@ void ensureInnerVectorCapacity(int targetCapacity) { } } - /** - * Nullable consumer for {@link ListVector}. - */ + /** Nullable consumer for {@link ListVector}. */ static class NullableArrayConsumer extends ArrayConsumer { - /** - * Instantiate a nullable array consumer. - */ + /** Instantiate a nullable array consumer. */ public NullableArrayConsumer(ListVector vector, JdbcConsumer delegate, int index) { super(vector, delegate, index); } @@ -113,14 +103,10 @@ public void consume(ResultSet resultSet) throws SQLException, IOException { } } - /** - * Non-nullable consumer for {@link ListVector}. - */ + /** Non-nullable consumer for {@link ListVector}. */ static class NonNullableArrayConsumer extends ArrayConsumer { - /** - * Instantiate a nullable array consumer. - */ + /** Instantiate a nullable array consumer. */ public NonNullableArrayConsumer(ListVector vector, JdbcConsumer delegate, int index) { super(vector, delegate, index); } diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/BaseConsumer.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/BaseConsumer.java index 2db128d3e2b2d..9ca3c98a7eb98 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/BaseConsumer.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/BaseConsumer.java @@ -14,13 +14,13 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.consumer; import org.apache.arrow.vector.ValueVector; /** * Base class for all consumers. + * * @param vector type. */ public abstract class BaseConsumer implements JdbcConsumer { @@ -33,6 +33,7 @@ public abstract class BaseConsumer implements JdbcConsume /** * Constructs a new consumer. + * * @param vector the underlying vector for the consumer. * @param index the column id for the consumer. */ diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/BigIntConsumer.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/BigIntConsumer.java index 19c8efa91719f..b7c547a9391b6 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/BigIntConsumer.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/BigIntConsumer.java @@ -14,24 +14,21 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.consumer; import java.sql.ResultSet; import java.sql.SQLException; - import org.apache.arrow.vector.BigIntVector; /** - * Consumer which consume bigint type values from {@link ResultSet}. - * Write the data to {@link org.apache.arrow.vector.BigIntVector}. + * Consumer which consume bigint type values from {@link ResultSet}. Write the data to {@link + * org.apache.arrow.vector.BigIntVector}. */ public class BigIntConsumer { - /** - * Creates a consumer for {@link BigIntVector}. - */ - public static JdbcConsumer createConsumer(BigIntVector vector, int index, boolean nullable) { + /** Creates a consumer for {@link BigIntVector}. */ + public static JdbcConsumer createConsumer( + BigIntVector vector, int index, boolean nullable) { if (nullable) { return new NullableBigIntConsumer(vector, index); } else { @@ -39,14 +36,10 @@ public static JdbcConsumer createConsumer(BigIntVector vector, int } } - /** - * Nullable consumer for big int. - */ + /** Nullable consumer for big int. */ static class NullableBigIntConsumer extends BaseConsumer { - /** - * Instantiate a BigIntConsumer. - */ + /** Instantiate a BigIntConsumer. */ public NullableBigIntConsumer(BigIntVector vector, int index) { super(vector, index); } @@ -63,14 +56,10 @@ public void consume(ResultSet resultSet) throws SQLException { } } - /** - * Non-nullable consumer for big int. - */ + /** Non-nullable consumer for big int. */ static class NonNullableBigIntConsumer extends BaseConsumer { - /** - * Instantiate a BigIntConsumer. - */ + /** Instantiate a BigIntConsumer. */ public NonNullableBigIntConsumer(BigIntVector vector, int index) { super(vector, index); } diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/BinaryConsumer.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/BinaryConsumer.java index 8c5f61169d405..edbc6360df6bf 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/BinaryConsumer.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/BinaryConsumer.java @@ -14,27 +14,23 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.consumer; import java.io.IOException; import java.io.InputStream; import java.sql.ResultSet; import java.sql.SQLException; - import org.apache.arrow.memory.ArrowBuf; import org.apache.arrow.vector.BitVectorHelper; import org.apache.arrow.vector.VarBinaryVector; /** - * Consumer which consume binary type values from {@link ResultSet}. - * Write the data to {@link org.apache.arrow.vector.VarBinaryVector}. + * Consumer which consume binary type values from {@link ResultSet}. Write the data to {@link + * org.apache.arrow.vector.VarBinaryVector}. */ public abstract class BinaryConsumer extends BaseConsumer { - /** - * Creates a consumer for {@link VarBinaryVector}. - */ + /** Creates a consumer for {@link VarBinaryVector}. */ public static BinaryConsumer createConsumer(VarBinaryVector vector, int index, boolean nullable) { if (nullable) { return new NullableBinaryConsumer(vector, index); @@ -45,9 +41,7 @@ public static BinaryConsumer createConsumer(VarBinaryVector vector, int index, b private final byte[] reuseBytes = new byte[1024]; - /** - * Instantiate a BinaryConsumer. - */ + /** Instantiate a BinaryConsumer. */ public BinaryConsumer(VarBinaryVector vector, int index) { super(vector, index); if (vector != null) { @@ -55,9 +49,7 @@ public BinaryConsumer(VarBinaryVector vector, int index) { } } - /** - * consume a InputStream. - */ + /** consume a InputStream. */ public void consume(InputStream is) throws IOException { if (is != null) { while (currentIndex >= vector.getValueCapacity()) { @@ -74,7 +66,8 @@ public void consume(InputStream is) throws IOException { vector.getDataBuffer().setBytes(startOffset + dataLength, reuseBytes, 0, read); dataLength += read; } - offsetBuffer.setInt((currentIndex + 1) * VarBinaryVector.OFFSET_WIDTH, startOffset + dataLength); + offsetBuffer.setInt( + (currentIndex + 1) * ((long) VarBinaryVector.OFFSET_WIDTH), startOffset + dataLength); BitVectorHelper.setBit(vector.getValidityBuffer(), currentIndex); vector.setLastSet(currentIndex); } @@ -91,14 +84,10 @@ public void resetValueVector(VarBinaryVector vector) { this.currentIndex = 0; } - /** - * Consumer for nullable binary data. - */ + /** Consumer for nullable binary data. */ static class NullableBinaryConsumer extends BinaryConsumer { - - /** - * Instantiate a BinaryConsumer. - */ + + /** Instantiate a BinaryConsumer. */ public NullableBinaryConsumer(VarBinaryVector vector, int index) { super(vector, index); } @@ -113,14 +102,10 @@ public void consume(ResultSet resultSet) throws SQLException, IOException { } } - /** - * Consumer for non-nullable binary data. - */ + /** Consumer for non-nullable binary data. */ static class NonNullableBinaryConsumer extends BinaryConsumer { - /** - * Instantiate a BinaryConsumer. - */ + /** Instantiate a BinaryConsumer. */ public NonNullableBinaryConsumer(VarBinaryVector vector, int index) { super(vector, index); } diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/BitConsumer.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/BitConsumer.java index d2d94d0a40e2f..287b9509b5054 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/BitConsumer.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/BitConsumer.java @@ -14,24 +14,21 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.consumer; import java.sql.ResultSet; import java.sql.SQLException; - import org.apache.arrow.vector.BitVector; /** - * Consumer which consume bit type values from {@link ResultSet}. - * Write the data to {@link BitVector}. + * Consumer which consume bit type values from {@link ResultSet}. Write the data to {@link + * BitVector}. */ public class BitConsumer { - /** - * Creates a consumer for {@link BitVector}. - */ - public static JdbcConsumer createConsumer(BitVector vector, int index, boolean nullable) { + /** Creates a consumer for {@link BitVector}. */ + public static JdbcConsumer createConsumer( + BitVector vector, int index, boolean nullable) { if (nullable) { return new NullableBitConsumer(vector, index); } else { @@ -39,14 +36,10 @@ public static JdbcConsumer createConsumer(BitVector vector, int index } } - /** - * Nullable consumer for {@link BitVector}. - */ + /** Nullable consumer for {@link BitVector}. */ static class NullableBitConsumer extends BaseConsumer { - /** - * Instantiate a BitConsumer. - */ + /** Instantiate a BitConsumer. */ public NullableBitConsumer(BitVector vector, int index) { super(vector, index); } @@ -63,14 +56,10 @@ public void consume(ResultSet resultSet) throws SQLException { } } - /** - * Non-nullable consumer for {@link BitVector}. - */ + /** Non-nullable consumer for {@link BitVector}. */ static class NonNullableBitConsumer extends BaseConsumer { - /** - * Instantiate a BitConsumer. - */ + /** Instantiate a BitConsumer. */ public NonNullableBitConsumer(BitVector vector, int index) { super(vector, index); } diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/BlobConsumer.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/BlobConsumer.java index e57ecdf91707a..a4fc789494e0f 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/BlobConsumer.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/BlobConsumer.java @@ -14,19 +14,17 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.consumer; import java.io.IOException; import java.sql.Blob; import java.sql.ResultSet; import java.sql.SQLException; - import org.apache.arrow.vector.VarBinaryVector; /** - * Consumer which consume blob type values from {@link ResultSet}. - * Write the data to {@link VarBinaryVector}. + * Consumer which consume blob type values from {@link ResultSet}. Write the data to {@link + * VarBinaryVector}. */ public class BlobConsumer extends BaseConsumer { @@ -34,17 +32,12 @@ public class BlobConsumer extends BaseConsumer { private final boolean nullable; - /** - * Creates a consumer for {@link VarBinaryVector}. - */ - public static BlobConsumer createConsumer( - BinaryConsumer delegate, int index, boolean nullable) { + /** Creates a consumer for {@link VarBinaryVector}. */ + public static BlobConsumer createConsumer(BinaryConsumer delegate, int index, boolean nullable) { return new BlobConsumer(delegate, index, nullable); } - /** - * Instantiate a BlobConsumer. - */ + /** Instantiate a BlobConsumer. */ public BlobConsumer(BinaryConsumer delegate, int index, boolean nullable) { super(null, index); this.delegate = delegate; diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/ClobConsumer.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/ClobConsumer.java index a52d9b73b4db0..7deba1cbffebd 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/ClobConsumer.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/ClobConsumer.java @@ -14,28 +14,24 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.consumer; import java.nio.charset.StandardCharsets; import java.sql.Clob; import java.sql.ResultSet; import java.sql.SQLException; - import org.apache.arrow.memory.ArrowBuf; import org.apache.arrow.memory.util.MemoryUtil; import org.apache.arrow.vector.BitVectorHelper; import org.apache.arrow.vector.VarCharVector; /** - * Consumer which consume clob type values from {@link ResultSet}. - * Write the data to {@link org.apache.arrow.vector.VarCharVector}. + * Consumer which consume clob type values from {@link ResultSet}. Write the data to {@link + * org.apache.arrow.vector.VarCharVector}. */ public abstract class ClobConsumer extends BaseConsumer { - /** - * Creates a consumer for {@link VarCharVector}. - */ + /** Creates a consumer for {@link VarCharVector}. */ public static ClobConsumer createConsumer(VarCharVector vector, int index, boolean nullable) { if (nullable) { return new NullableClobConsumer(vector, index); @@ -46,9 +42,7 @@ public static ClobConsumer createConsumer(VarCharVector vector, int index, boole private static final int BUFFER_SIZE = 256; - /** - * Instantiate a ClobConsumer. - */ + /** Instantiate a ClobConsumer. */ public ClobConsumer(VarCharVector vector, int index) { super(vector, index); if (vector != null) { @@ -63,14 +57,10 @@ public void resetValueVector(VarCharVector vector) { this.currentIndex = 0; } - /** - * Nullable consumer for clob data. - */ + /** Nullable consumer for clob data. */ static class NullableClobConsumer extends ClobConsumer { - - /** - * Instantiate a ClobConsumer. - */ + + /** Instantiate a ClobConsumer. */ public NullableClobConsumer(VarCharVector vector, int index) { super(vector, index); } @@ -88,7 +78,7 @@ public void consume(ResultSet resultSet) throws SQLException { ArrowBuf dataBuffer = vector.getDataBuffer(); ArrowBuf offsetBuffer = vector.getOffsetBuffer(); - int startIndex = offsetBuffer.getInt(currentIndex * 4); + int startIndex = offsetBuffer.getInt(currentIndex * 4L); while (read <= length) { String str = clob.getSubString(read, readSize); byte[] bytes = str.getBytes(StandardCharsets.UTF_8); @@ -97,16 +87,16 @@ public void consume(ResultSet resultSet) throws SQLException { vector.reallocDataBuffer(); } MemoryUtil.UNSAFE.copyMemory( - bytes, - MemoryUtil.BYTE_ARRAY_BASE_OFFSET, - null, - dataBuffer.memoryAddress() + startIndex + totalBytes, - bytes.length); + bytes, + MemoryUtil.BYTE_ARRAY_BASE_OFFSET, + null, + dataBuffer.memoryAddress() + startIndex + totalBytes, + bytes.length); totalBytes += bytes.length; read += readSize; } - offsetBuffer.setInt((currentIndex + 1) * 4, startIndex + totalBytes); + offsetBuffer.setInt((currentIndex + 1) * 4L, startIndex + totalBytes); BitVectorHelper.setBit(vector.getValidityBuffer(), currentIndex); vector.setLastSet(currentIndex); } @@ -115,14 +105,10 @@ public void consume(ResultSet resultSet) throws SQLException { } } - /** - * Non-nullable consumer for clob data. - */ + /** Non-nullable consumer for clob data. */ static class NonNullableClobConsumer extends ClobConsumer { - /** - * Instantiate a ClobConsumer. - */ + /** Instantiate a ClobConsumer. */ public NonNullableClobConsumer(VarCharVector vector, int index) { super(vector, index); } @@ -139,7 +125,7 @@ public void consume(ResultSet resultSet) throws SQLException { ArrowBuf dataBuffer = vector.getDataBuffer(); ArrowBuf offsetBuffer = vector.getOffsetBuffer(); - int startIndex = offsetBuffer.getInt(currentIndex * 4); + int startIndex = offsetBuffer.getInt(currentIndex * 4L); while (read <= length) { String str = clob.getSubString(read, readSize); byte[] bytes = str.getBytes(StandardCharsets.UTF_8); @@ -148,20 +134,20 @@ public void consume(ResultSet resultSet) throws SQLException { vector.reallocDataBuffer(); } MemoryUtil.UNSAFE.copyMemory( - bytes, - MemoryUtil.BYTE_ARRAY_BASE_OFFSET, - null, - dataBuffer.memoryAddress() + startIndex + totalBytes, - bytes.length); + bytes, + MemoryUtil.BYTE_ARRAY_BASE_OFFSET, + null, + dataBuffer.memoryAddress() + startIndex + totalBytes, + bytes.length); totalBytes += bytes.length; read += readSize; } - offsetBuffer.setInt((currentIndex + 1) * 4, startIndex + totalBytes); + offsetBuffer.setInt((currentIndex + 1) * 4L, startIndex + totalBytes); BitVectorHelper.setBit(vector.getValidityBuffer(), currentIndex); vector.setLastSet(currentIndex); } - + currentIndex++; } } diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/CompositeJdbcConsumer.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/CompositeJdbcConsumer.java index e6d780956d538..2366116fd0d18 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/CompositeJdbcConsumer.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/CompositeJdbcConsumer.java @@ -14,13 +14,11 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.consumer; import java.io.IOException; import java.sql.ResultSet; import java.sql.SQLException; - import org.apache.arrow.adapter.jdbc.JdbcFieldInfo; import org.apache.arrow.adapter.jdbc.consumer.exceptions.JdbcConsumerException; import org.apache.arrow.util.AutoCloseables; @@ -28,17 +26,12 @@ import org.apache.arrow.vector.VectorSchemaRoot; import org.apache.arrow.vector.types.pojo.ArrowType; -/** - * Composite consumer which hold all consumers. - * It manages the consume and cleanup process. - */ +/** Composite consumer which hold all consumers. It manages the consume and cleanup process. */ public class CompositeJdbcConsumer implements JdbcConsumer { private final JdbcConsumer[] consumers; - /** - * Construct an instance. - */ + /** Construct an instance. */ public CompositeJdbcConsumer(JdbcConsumer[] consumers) { this.consumers = consumers; } @@ -51,9 +44,11 @@ public void consume(ResultSet rs) throws SQLException, IOException { } catch (Exception e) { if (consumers[i] instanceof BaseConsumer) { BaseConsumer consumer = (BaseConsumer) consumers[i]; - JdbcFieldInfo fieldInfo = new JdbcFieldInfo(rs.getMetaData(), consumer.columnIndexInResultSet); + JdbcFieldInfo fieldInfo = + new JdbcFieldInfo(rs.getMetaData(), consumer.columnIndexInResultSet); ArrowType arrowType = consumer.vector.getMinorType().getType(); - throw new JdbcConsumerException("Exception while consuming JDBC value", e, fieldInfo, arrowType); + throw new JdbcConsumerException( + "Exception while consuming JDBC value", e, fieldInfo, arrowType); } else { throw e; } @@ -70,17 +65,12 @@ public void close() { } catch (Exception e) { throw new RuntimeException("Error occurred while releasing resources.", e); } - } @Override - public void resetValueVector(ValueVector vector) { + public void resetValueVector(ValueVector vector) {} - } - - /** - * Reset inner consumers through vectors in the vector schema root. - */ + /** Reset inner consumers through vectors in the vector schema root. */ public void resetVectorSchemaRoot(VectorSchemaRoot root) { assert root.getFieldVectors().size() == consumers.length; for (int i = 0; i < consumers.length; i++) { @@ -88,4 +78,3 @@ public void resetVectorSchemaRoot(VectorSchemaRoot root) { } } } - diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/DateConsumer.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/DateConsumer.java index b9b83daccc25a..c271b900682a1 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/DateConsumer.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/DateConsumer.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.consumer; import java.sql.Date; @@ -22,19 +21,16 @@ import java.sql.SQLException; import java.util.Calendar; import java.util.concurrent.TimeUnit; - import org.apache.arrow.vector.DateDayVector; import org.apache.arrow.vector.DateMilliVector; /** - * Consumer which consume date type values from {@link ResultSet}. - * Write the data to {@link org.apache.arrow.vector.DateDayVector}. + * Consumer which consume date type values from {@link ResultSet}. Write the data to {@link + * org.apache.arrow.vector.DateDayVector}. */ public class DateConsumer { - /** - * Creates a consumer for {@link DateMilliVector}. - */ + /** Creates a consumer for {@link DateMilliVector}. */ public static JdbcConsumer createConsumer( DateDayVector vector, int index, boolean nullable, Calendar calendar) { if (nullable) { @@ -44,23 +40,17 @@ public static JdbcConsumer createConsumer( } } - /** - * Nullable consumer for date. - */ + /** Nullable consumer for date. */ static class NullableDateConsumer extends BaseConsumer { protected final Calendar calendar; - /** - * Instantiate a DateConsumer. - */ + /** Instantiate a DateConsumer. */ public NullableDateConsumer(DateDayVector vector, int index) { - this(vector, index, /* calendar */null); + this(vector, index, /* calendar */ null); } - /** - * Instantiate a DateConsumer. - */ + /** Instantiate a DateConsumer. */ public NullableDateConsumer(DateDayVector vector, int index, Calendar calendar) { super(vector, index); this.calendar = calendar; @@ -68,8 +58,10 @@ public NullableDateConsumer(DateDayVector vector, int index, Calendar calendar) @Override public void consume(ResultSet resultSet) throws SQLException { - Date date = calendar == null ? resultSet.getDate(columnIndexInResultSet) : - resultSet.getDate(columnIndexInResultSet, calendar); + Date date = + calendar == null + ? resultSet.getDate(columnIndexInResultSet) + : resultSet.getDate(columnIndexInResultSet, calendar); if (!resultSet.wasNull()) { // for fixed width vectors, we have allocated enough memory proactively, // so there is no need to call the setSafe method here. @@ -79,23 +71,17 @@ public void consume(ResultSet resultSet) throws SQLException { } } - /** - * Non-nullable consumer for date. - */ + /** Non-nullable consumer for date. */ static class NonNullableDateConsumer extends BaseConsumer { protected final Calendar calendar; - /** - * Instantiate a DateConsumer. - */ + /** Instantiate a DateConsumer. */ public NonNullableDateConsumer(DateDayVector vector, int index) { - this(vector, index, /* calendar */null); + this(vector, index, /* calendar */ null); } - /** - * Instantiate a DateConsumer. - */ + /** Instantiate a DateConsumer. */ public NonNullableDateConsumer(DateDayVector vector, int index, Calendar calendar) { super(vector, index); this.calendar = calendar; @@ -103,8 +89,10 @@ public NonNullableDateConsumer(DateDayVector vector, int index, Calendar calenda @Override public void consume(ResultSet resultSet) throws SQLException { - Date date = calendar == null ? resultSet.getDate(columnIndexInResultSet) : - resultSet.getDate(columnIndexInResultSet, calendar); + Date date = + calendar == null + ? resultSet.getDate(columnIndexInResultSet) + : resultSet.getDate(columnIndexInResultSet, calendar); // for fixed width vectors, we have allocated enough memory proactively, // so there is no need to call the setSafe method here. vector.set(currentIndex, Math.toIntExact(TimeUnit.MILLISECONDS.toDays(date.getTime()))); @@ -112,5 +100,3 @@ public void consume(ResultSet resultSet) throws SQLException { } } } - - diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/Decimal256Consumer.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/Decimal256Consumer.java index ad00d9b5a2492..eb33ea5038b98 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/Decimal256Consumer.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/Decimal256Consumer.java @@ -14,19 +14,17 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.consumer; import java.math.BigDecimal; import java.math.RoundingMode; import java.sql.ResultSet; import java.sql.SQLException; - import org.apache.arrow.vector.Decimal256Vector; /** - * Consumer which consume decimal type values from {@link ResultSet}. - * Write the data to {@link org.apache.arrow.vector.Decimal256Vector}. + * Consumer which consume decimal type values from {@link ResultSet}. Write the data to {@link + * org.apache.arrow.vector.Decimal256Vector}. */ public abstract class Decimal256Consumer extends BaseConsumer { private final RoundingMode bigDecimalRoundingMode; @@ -36,7 +34,7 @@ public abstract class Decimal256Consumer extends BaseConsumer * Constructs a new consumer. * * @param vector the underlying vector for the consumer. - * @param index the column id for the consumer. + * @param index the column id for the consumer. */ public Decimal256Consumer(Decimal256Vector vector, int index) { this(vector, index, null); @@ -44,27 +42,23 @@ public Decimal256Consumer(Decimal256Vector vector, int index) { /** * Constructs a new consumer, with optional coercibility. + * * @param vector the underlying vector for the consumer. * @param index the column index for the consumer. - * @param bigDecimalRoundingMode java.math.RoundingMode to be applied if the BigDecimal scale does not match that - * of the target vector. Set to null to retain strict matching behavior (scale of - * source and target vector must match exactly). + * @param bigDecimalRoundingMode java.math.RoundingMode to be applied if the BigDecimal scale does + * not match that of the target vector. Set to null to retain strict matching behavior (scale + * of source and target vector must match exactly). */ - public Decimal256Consumer(Decimal256Vector vector, int index, RoundingMode bigDecimalRoundingMode) { + public Decimal256Consumer( + Decimal256Vector vector, int index, RoundingMode bigDecimalRoundingMode) { super(vector, index); this.bigDecimalRoundingMode = bigDecimalRoundingMode; this.scale = vector.getScale(); } - /** - * Creates a consumer for {@link Decimal256Vector}. - */ + /** Creates a consumer for {@link Decimal256Vector}. */ public static JdbcConsumer createConsumer( - Decimal256Vector vector, - int index, - boolean nullable, - RoundingMode bigDecimalRoundingMode - ) { + Decimal256Vector vector, int index, boolean nullable, RoundingMode bigDecimalRoundingMode) { if (nullable) { return new NullableDecimal256Consumer(vector, index, bigDecimalRoundingMode); } else { @@ -79,16 +73,12 @@ protected void set(BigDecimal value) { vector.set(currentIndex, value); } - - /** - * Consumer for nullable decimal. - */ + /** Consumer for nullable decimal. */ static class NullableDecimal256Consumer extends Decimal256Consumer { - /** - * Instantiate a Decimal256Consumer. - */ - public NullableDecimal256Consumer(Decimal256Vector vector, int index, RoundingMode bigDecimalRoundingMode) { + /** Instantiate a Decimal256Consumer. */ + public NullableDecimal256Consumer( + Decimal256Vector vector, int index, RoundingMode bigDecimalRoundingMode) { super(vector, index, bigDecimalRoundingMode); } @@ -104,15 +94,12 @@ public void consume(ResultSet resultSet) throws SQLException { } } - /** - * Consumer for non-nullable decimal. - */ + /** Consumer for non-nullable decimal. */ static class NonNullableDecimal256Consumer extends Decimal256Consumer { - /** - * Instantiate a Decimal256Consumer. - */ - public NonNullableDecimal256Consumer(Decimal256Vector vector, int index, RoundingMode bigDecimalRoundingMode) { + /** Instantiate a Decimal256Consumer. */ + public NonNullableDecimal256Consumer( + Decimal256Vector vector, int index, RoundingMode bigDecimalRoundingMode) { super(vector, index, bigDecimalRoundingMode); } diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/DecimalConsumer.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/DecimalConsumer.java index bed96dda8b65d..05b4d27de1022 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/DecimalConsumer.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/DecimalConsumer.java @@ -14,19 +14,17 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.consumer; import java.math.BigDecimal; import java.math.RoundingMode; import java.sql.ResultSet; import java.sql.SQLException; - import org.apache.arrow.vector.DecimalVector; /** - * Consumer which consume decimal type values from {@link ResultSet}. - * Write the data to {@link org.apache.arrow.vector.DecimalVector}. + * Consumer which consume decimal type values from {@link ResultSet}. Write the data to {@link + * org.apache.arrow.vector.DecimalVector}. */ public abstract class DecimalConsumer extends BaseConsumer { private final RoundingMode bigDecimalRoundingMode; @@ -36,7 +34,7 @@ public abstract class DecimalConsumer extends BaseConsumer { * Constructs a new consumer. * * @param vector the underlying vector for the consumer. - * @param index the column id for the consumer. + * @param index the column id for the consumer. */ public DecimalConsumer(DecimalVector vector, int index) { this(vector, index, null); @@ -44,11 +42,12 @@ public DecimalConsumer(DecimalVector vector, int index) { /** * Constructs a new consumer, with optional coercibility. + * * @param vector the underlying vector for the consumer. * @param index the column index for the consumer. - * @param bigDecimalRoundingMode java.math.RoundingMode to be applied if the BigDecimal scale does not match that - * of the target vector. Set to null to retain strict matching behavior (scale of - * source and target vector must match exactly). + * @param bigDecimalRoundingMode java.math.RoundingMode to be applied if the BigDecimal scale does + * not match that of the target vector. Set to null to retain strict matching behavior (scale + * of source and target vector must match exactly). */ public DecimalConsumer(DecimalVector vector, int index, RoundingMode bigDecimalRoundingMode) { super(vector, index); @@ -56,15 +55,9 @@ public DecimalConsumer(DecimalVector vector, int index, RoundingMode bigDecimalR this.scale = vector.getScale(); } - /** - * Creates a consumer for {@link DecimalVector}. - */ + /** Creates a consumer for {@link DecimalVector}. */ public static JdbcConsumer createConsumer( - DecimalVector vector, - int index, - boolean nullable, - RoundingMode bigDecimalRoundingMode - ) { + DecimalVector vector, int index, boolean nullable, RoundingMode bigDecimalRoundingMode) { if (nullable) { return new NullableDecimalConsumer(vector, index, bigDecimalRoundingMode); } else { @@ -79,16 +72,12 @@ protected void set(BigDecimal value) { vector.set(currentIndex, value); } - - /** - * Consumer for nullable decimal. - */ + /** Consumer for nullable decimal. */ static class NullableDecimalConsumer extends DecimalConsumer { - /** - * Instantiate a DecimalConsumer. - */ - public NullableDecimalConsumer(DecimalVector vector, int index, RoundingMode bigDecimalRoundingMode) { + /** Instantiate a DecimalConsumer. */ + public NullableDecimalConsumer( + DecimalVector vector, int index, RoundingMode bigDecimalRoundingMode) { super(vector, index, bigDecimalRoundingMode); } @@ -104,15 +93,12 @@ public void consume(ResultSet resultSet) throws SQLException { } } - /** - * Consumer for non-nullable decimal. - */ + /** Consumer for non-nullable decimal. */ static class NonNullableDecimalConsumer extends DecimalConsumer { - /** - * Instantiate a DecimalConsumer. - */ - public NonNullableDecimalConsumer(DecimalVector vector, int index, RoundingMode bigDecimalRoundingMode) { + /** Instantiate a DecimalConsumer. */ + public NonNullableDecimalConsumer( + DecimalVector vector, int index, RoundingMode bigDecimalRoundingMode) { super(vector, index, bigDecimalRoundingMode); } diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/DoubleConsumer.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/DoubleConsumer.java index e3db95d1535af..9cd31e9245472 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/DoubleConsumer.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/DoubleConsumer.java @@ -14,24 +14,21 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.consumer; import java.sql.ResultSet; import java.sql.SQLException; - import org.apache.arrow.vector.Float8Vector; /** - * Consumer which consume double type values from {@link ResultSet}. - * Write the data to {@link org.apache.arrow.vector.Float8Vector}. + * Consumer which consume double type values from {@link ResultSet}. Write the data to {@link + * org.apache.arrow.vector.Float8Vector}. */ public class DoubleConsumer { - /** - * Creates a consumer for {@link Float8Vector}. - */ - public static JdbcConsumer createConsumer(Float8Vector vector, int index, boolean nullable) { + /** Creates a consumer for {@link Float8Vector}. */ + public static JdbcConsumer createConsumer( + Float8Vector vector, int index, boolean nullable) { if (nullable) { return new NullableDoubleConsumer(vector, index); } else { @@ -39,14 +36,10 @@ public static JdbcConsumer createConsumer(Float8Vector vector, int } } - /** - * Nullable double consumer. - */ + /** Nullable double consumer. */ static class NullableDoubleConsumer extends BaseConsumer { - /** - * Instantiate a DoubleConsumer. - */ + /** Instantiate a DoubleConsumer. */ public NullableDoubleConsumer(Float8Vector vector, int index) { super(vector, index); } @@ -63,14 +56,10 @@ public void consume(ResultSet resultSet) throws SQLException { } } - /** - * Non-nullable double consumer. - */ + /** Non-nullable double consumer. */ static class NonNullableDoubleConsumer extends BaseConsumer { - /** - * Instantiate a DoubleConsumer. - */ + /** Instantiate a DoubleConsumer. */ public NonNullableDoubleConsumer(Float8Vector vector, int index) { super(vector, index); } diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/FloatConsumer.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/FloatConsumer.java index 830348fe94c6b..0f16a68da883e 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/FloatConsumer.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/FloatConsumer.java @@ -14,24 +14,21 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.consumer; import java.sql.ResultSet; import java.sql.SQLException; - import org.apache.arrow.vector.Float4Vector; /** - * Consumer which consume float type values from {@link ResultSet}. - * Write the data to {@link org.apache.arrow.vector.Float4Vector}. + * Consumer which consume float type values from {@link ResultSet}. Write the data to {@link + * org.apache.arrow.vector.Float4Vector}. */ public class FloatConsumer { - /** - * Creates a consumer for {@link Float4Vector}. - */ - public static JdbcConsumer createConsumer(Float4Vector vector, int index, boolean nullable) { + /** Creates a consumer for {@link Float4Vector}. */ + public static JdbcConsumer createConsumer( + Float4Vector vector, int index, boolean nullable) { if (nullable) { return new NullableFloatConsumer(vector, index); } else { @@ -39,14 +36,10 @@ public static JdbcConsumer createConsumer(Float4Vector vector, int } } - /** - * Nullable float consumer. - */ + /** Nullable float consumer. */ static class NullableFloatConsumer extends BaseConsumer { - /** - * Instantiate a FloatConsumer. - */ + /** Instantiate a FloatConsumer. */ public NullableFloatConsumer(Float4Vector vector, int index) { super(vector, index); } @@ -63,14 +56,10 @@ public void consume(ResultSet resultSet) throws SQLException { } } - /** - * Non-nullable float consumer. - */ + /** Non-nullable float consumer. */ static class NonNullableFloatConsumer extends BaseConsumer { - /** - * Instantiate a FloatConsumer. - */ + /** Instantiate a FloatConsumer. */ public NonNullableFloatConsumer(Float4Vector vector, int index) { super(vector, index); } diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/IntConsumer.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/IntConsumer.java index 4e537d682ff7c..302be697fbf07 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/IntConsumer.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/IntConsumer.java @@ -14,24 +14,21 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.consumer; import java.sql.ResultSet; import java.sql.SQLException; - import org.apache.arrow.vector.IntVector; /** - * Consumer which consume int type values from {@link ResultSet}. - * Write the data to {@link org.apache.arrow.vector.IntVector}. + * Consumer which consume int type values from {@link ResultSet}. Write the data to {@link + * org.apache.arrow.vector.IntVector}. */ public class IntConsumer { - /** - * Creates a consumer for {@link IntVector}. - */ - public static JdbcConsumer createConsumer(IntVector vector, int index, boolean nullable) { + /** Creates a consumer for {@link IntVector}. */ + public static JdbcConsumer createConsumer( + IntVector vector, int index, boolean nullable) { if (nullable) { return new NullableIntConsumer(vector, index); } else { @@ -39,14 +36,10 @@ public static JdbcConsumer createConsumer(IntVector vector, int index } } - /** - * Nullable consumer for int. - */ + /** Nullable consumer for int. */ static class NullableIntConsumer extends BaseConsumer { - /** - * Instantiate a IntConsumer. - */ + /** Instantiate a IntConsumer. */ public NullableIntConsumer(IntVector vector, int index) { super(vector, index); } @@ -63,14 +56,10 @@ public void consume(ResultSet resultSet) throws SQLException { } } - /** - * Non-nullable consumer for int. - */ + /** Non-nullable consumer for int. */ static class NonNullableIntConsumer extends BaseConsumer { - /** - * Instantiate a IntConsumer. - */ + /** Instantiate a IntConsumer. */ public NonNullableIntConsumer(IntVector vector, int index) { super(vector, index); } diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/JdbcConsumer.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/JdbcConsumer.java index 480dfe3a1c57f..1ec6ad7eb9266 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/JdbcConsumer.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/JdbcConsumer.java @@ -14,33 +14,27 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.consumer; import java.io.IOException; import java.sql.ResultSet; import java.sql.SQLException; - import org.apache.arrow.vector.ValueVector; /** * An abstraction that is used to consume values from {@link ResultSet}. + * * @param The vector within consumer or its delegate, used for partially consume purpose. */ public interface JdbcConsumer extends AutoCloseable { - /** - * Consume a specific type value from {@link ResultSet} and write it to vector. - */ + /** Consume a specific type value from {@link ResultSet} and write it to vector. */ void consume(ResultSet resultSet) throws SQLException, IOException; - /** - * Close this consumer, do some clean work such as clear reuse ArrowBuf. - */ + /** Close this consumer, do some clean work such as clear reuse ArrowBuf. */ + @Override void close() throws Exception; - /** - * Reset the vector within consumer for partial read purpose. - */ + /** Reset the vector within consumer for partial read purpose. */ void resetValueVector(T vector); } diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/MapConsumer.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/MapConsumer.java index 07a071bfc096e..6223650ff2c04 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/MapConsumer.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/MapConsumer.java @@ -14,46 +14,39 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.consumer; +import com.fasterxml.jackson.core.type.TypeReference; +import com.fasterxml.jackson.databind.ObjectMapper; import java.io.IOException; import java.nio.charset.StandardCharsets; import java.sql.ResultSet; import java.sql.SQLException; import java.util.Map; - import org.apache.arrow.memory.ArrowBuf; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.vector.complex.MapVector; import org.apache.arrow.vector.complex.impl.UnionMapWriter; import org.apache.arrow.vector.util.ObjectMapperFactory; -import com.fasterxml.jackson.core.type.TypeReference; -import com.fasterxml.jackson.databind.ObjectMapper; - /** - * Consumer which consume map type values from {@link ResultSet}. - * Write the data into {@link org.apache.arrow.vector.complex.MapVector}. + * Consumer which consume map type values from {@link ResultSet}. Write the data into {@link + * org.apache.arrow.vector.complex.MapVector}. */ public class MapConsumer extends BaseConsumer { - private final UnionMapWriter writer; private final ObjectMapper objectMapper = ObjectMapperFactory.newObjectMapper(); - private final TypeReference> typeReference = new TypeReference>() {}; + private final TypeReference> typeReference = + new TypeReference>() {}; private int currentRow; - /** - * Creates a consumer for {@link MapVector}. - */ + /** Creates a consumer for {@link MapVector}. */ public static MapConsumer createConsumer(MapVector mapVector, int index, boolean nullable) { return new MapConsumer(mapVector, index); } - /** - * Instantiate a MapConsumer. - */ + /** Instantiate a MapConsumer. */ public MapConsumer(MapVector vector, int index) { super(vector, index); writer = vector.getWriter(); @@ -69,7 +62,8 @@ public void consume(ResultSet resultSet) throws SQLException, IOException { } else if (map instanceof Map) { writeJavaMapIntoVector((Map) map); } else { - throw new IllegalArgumentException("Unknown type of map type column from JDBC " + map.getClass().getName()); + throw new IllegalArgumentException( + "Unknown type of map type column from JDBC " + map.getClass().getName()); } } else { writer.writeNull(); @@ -79,26 +73,25 @@ public void consume(ResultSet resultSet) throws SQLException, IOException { private void writeJavaMapIntoVector(Map map) { BufferAllocator allocator = vector.getAllocator(); writer.startMap(); - map.forEach((key, value) -> { - byte[] keyBytes = key.getBytes(StandardCharsets.UTF_8); - byte[] valueBytes = value != null ? value.getBytes(StandardCharsets.UTF_8) : null; - try ( - ArrowBuf keyBuf = allocator.buffer(keyBytes.length); - ArrowBuf valueBuf = valueBytes != null ? allocator.buffer(valueBytes.length) : null; - ) { - writer.startEntry(); - keyBuf.writeBytes(keyBytes); - writer.key().varChar().writeVarChar(0, keyBytes.length, keyBuf); - if (valueBytes != null) { - valueBuf.writeBytes(valueBytes); - writer.value().varChar().writeVarChar(0, valueBytes.length, valueBuf); - } else { - writer.value().varChar().writeNull(); - } - writer.endEntry(); - } - }); + map.forEach( + (key, value) -> { + byte[] keyBytes = key.getBytes(StandardCharsets.UTF_8); + byte[] valueBytes = value != null ? value.getBytes(StandardCharsets.UTF_8) : null; + try (ArrowBuf keyBuf = allocator.buffer(keyBytes.length); + ArrowBuf valueBuf = + valueBytes != null ? allocator.buffer(valueBytes.length) : null; ) { + writer.startEntry(); + keyBuf.writeBytes(keyBytes); + writer.key().varChar().writeVarChar(0, keyBytes.length, keyBuf); + if (valueBytes != null) { + valueBuf.writeBytes(valueBytes); + writer.value().varChar().writeVarChar(0, valueBytes.length, valueBuf); + } else { + writer.value().varChar().writeNull(); + } + writer.endEntry(); + } + }); writer.endMap(); } } - diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/NullConsumer.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/NullConsumer.java index a79a029f45d06..9d7a760f697a7 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/NullConsumer.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/NullConsumer.java @@ -14,17 +14,15 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.consumer; import java.sql.ResultSet; import java.sql.SQLException; - import org.apache.arrow.vector.NullVector; /** - * Consumer which consume null type values from ResultSet. - * Corresponding to {@link org.apache.arrow.vector.NullVector}. + * Consumer which consume null type values from ResultSet. Corresponding to {@link + * org.apache.arrow.vector.NullVector}. */ public class NullConsumer extends BaseConsumer { @@ -33,6 +31,5 @@ public NullConsumer(NullVector vector) { } @Override - public void consume(ResultSet resultSet) throws SQLException { - } + public void consume(ResultSet resultSet) throws SQLException {} } diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/SmallIntConsumer.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/SmallIntConsumer.java index 2edb3605b177a..9f45c077ed0a8 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/SmallIntConsumer.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/SmallIntConsumer.java @@ -14,24 +14,21 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.consumer; import java.sql.ResultSet; import java.sql.SQLException; - import org.apache.arrow.vector.SmallIntVector; /** - * Consumer which consume smallInt type values from {@link ResultSet}. - * Write the data to {@link org.apache.arrow.vector.SmallIntVector}. + * Consumer which consume smallInt type values from {@link ResultSet}. Write the data to {@link + * org.apache.arrow.vector.SmallIntVector}. */ public class SmallIntConsumer { - /** - * Creates a consumer for {@link SmallIntVector}. - */ - public static BaseConsumer createConsumer(SmallIntVector vector, int index, boolean nullable) { + /** Creates a consumer for {@link SmallIntVector}. */ + public static BaseConsumer createConsumer( + SmallIntVector vector, int index, boolean nullable) { if (nullable) { return new NullableSmallIntConsumer(vector, index); } else { @@ -39,14 +36,10 @@ public static BaseConsumer createConsumer(SmallIntVector vector, } } - /** - * Nullable consumer for small int. - */ + /** Nullable consumer for small int. */ static class NullableSmallIntConsumer extends BaseConsumer { - /** - * Instantiate a SmallIntConsumer. - */ + /** Instantiate a SmallIntConsumer. */ public NullableSmallIntConsumer(SmallIntVector vector, int index) { super(vector, index); } @@ -63,14 +56,10 @@ public void consume(ResultSet resultSet) throws SQLException { } } - /** - * Non-nullable consumer for small int. - */ + /** Non-nullable consumer for small int. */ static class NonNullableSmallIntConsumer extends BaseConsumer { - /** - * Instantiate a SmallIntConsumer. - */ + /** Instantiate a SmallIntConsumer. */ public NonNullableSmallIntConsumer(SmallIntVector vector, int index) { super(vector, index); } diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/TimeConsumer.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/TimeConsumer.java index 4fa15ad79039e..bee19d0e4deab 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/TimeConsumer.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/TimeConsumer.java @@ -14,27 +14,23 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.consumer; import java.sql.ResultSet; import java.sql.SQLException; import java.sql.Time; import java.util.Calendar; - import org.apache.arrow.vector.TimeMilliVector; /** - * Consumer which consume time type values from {@link ResultSet}. - * Write the data to {@link org.apache.arrow.vector.TimeMilliVector}. + * Consumer which consume time type values from {@link ResultSet}. Write the data to {@link + * org.apache.arrow.vector.TimeMilliVector}. */ public abstract class TimeConsumer { - /** - * Creates a consumer for {@link TimeMilliVector}. - */ + /** Creates a consumer for {@link TimeMilliVector}. */ public static JdbcConsumer createConsumer( - TimeMilliVector vector, int index, boolean nullable, Calendar calendar) { + TimeMilliVector vector, int index, boolean nullable, Calendar calendar) { if (nullable) { return new NullableTimeConsumer(vector, index, calendar); } else { @@ -42,23 +38,17 @@ public static JdbcConsumer createConsumer( } } - /** - * Nullable consumer for {@link TimeMilliVector}. - */ + /** Nullable consumer for {@link TimeMilliVector}. */ static class NullableTimeConsumer extends BaseConsumer { protected final Calendar calendar; - /** - * Instantiate a TimeConsumer. - */ + /** Instantiate a TimeConsumer. */ public NullableTimeConsumer(TimeMilliVector vector, int index) { - this(vector, index, /* calendar */null); + this(vector, index, /* calendar */ null); } - /** - * Instantiate a TimeConsumer. - */ + /** Instantiate a TimeConsumer. */ public NullableTimeConsumer(TimeMilliVector vector, int index, Calendar calendar) { super(vector, index); this.calendar = calendar; @@ -66,8 +56,10 @@ public NullableTimeConsumer(TimeMilliVector vector, int index, Calendar calendar @Override public void consume(ResultSet resultSet) throws SQLException { - Time time = calendar == null ? resultSet.getTime(columnIndexInResultSet) : - resultSet.getTime(columnIndexInResultSet, calendar); + Time time = + calendar == null + ? resultSet.getTime(columnIndexInResultSet) + : resultSet.getTime(columnIndexInResultSet, calendar); if (!resultSet.wasNull()) { // for fixed width vectors, we have allocated enough memory proactively, // so there is no need to call the setSafe method here. @@ -77,23 +69,17 @@ public void consume(ResultSet resultSet) throws SQLException { } } - /** - * Non-nullable consumer for {@link TimeMilliVector}. - */ + /** Non-nullable consumer for {@link TimeMilliVector}. */ static class NonNullableTimeConsumer extends BaseConsumer { protected final Calendar calendar; - /** - * Instantiate a TimeConsumer. - */ + /** Instantiate a TimeConsumer. */ public NonNullableTimeConsumer(TimeMilliVector vector, int index) { - this(vector, index, /* calendar */null); + this(vector, index, /* calendar */ null); } - /** - * Instantiate a TimeConsumer. - */ + /** Instantiate a TimeConsumer. */ public NonNullableTimeConsumer(TimeMilliVector vector, int index, Calendar calendar) { super(vector, index); this.calendar = calendar; @@ -101,8 +87,10 @@ public NonNullableTimeConsumer(TimeMilliVector vector, int index, Calendar calen @Override public void consume(ResultSet resultSet) throws SQLException { - Time time = calendar == null ? resultSet.getTime(columnIndexInResultSet) : - resultSet.getTime(columnIndexInResultSet, calendar); + Time time = + calendar == null + ? resultSet.getTime(columnIndexInResultSet) + : resultSet.getTime(columnIndexInResultSet, calendar); // for fixed width vectors, we have allocated enough memory proactively, // so there is no need to call the setSafe method here. vector.set(currentIndex, (int) time.getTime()); diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/TimestampConsumer.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/TimestampConsumer.java index 3351e7e78a7e4..cc6269c21f04a 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/TimestampConsumer.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/TimestampConsumer.java @@ -14,26 +14,22 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.consumer; import java.sql.ResultSet; import java.sql.SQLException; import java.sql.Timestamp; - import org.apache.arrow.vector.TimeStampMilliVector; /** - * Consumer which consume timestamp type values from {@link ResultSet}. - * Write the data to {@link TimeStampMilliVector}. + * Consumer which consume timestamp type values from {@link ResultSet}. Write the data to {@link + * TimeStampMilliVector}. */ public abstract class TimestampConsumer { - /** - * Creates a consumer for {@link TimeStampMilliVector}. - */ + /** Creates a consumer for {@link TimeStampMilliVector}. */ public static JdbcConsumer createConsumer( - TimeStampMilliVector vector, int index, boolean nullable) { + TimeStampMilliVector vector, int index, boolean nullable) { if (nullable) { return new NullableTimestampConsumer(vector, index); } else { @@ -41,14 +37,10 @@ public static JdbcConsumer createConsumer( } } - /** - * Nullable consumer for timestamp. - */ + /** Nullable consumer for timestamp. */ static class NullableTimestampConsumer extends BaseConsumer { - /** - * Instantiate a TimestampConsumer. - */ + /** Instantiate a TimestampConsumer. */ public NullableTimestampConsumer(TimeStampMilliVector vector, int index) { super(vector, index); } @@ -65,14 +57,10 @@ public void consume(ResultSet resultSet) throws SQLException { } } - /** - * Non-nullable consumer for timestamp. - */ + /** Non-nullable consumer for timestamp. */ static class NonNullableTimestampConsumer extends BaseConsumer { - /** - * Instantiate a TimestampConsumer. - */ + /** Instantiate a TimestampConsumer. */ public NonNullableTimestampConsumer(TimeStampMilliVector vector, int index) { super(vector, index); } diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/TimestampTZConsumer.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/TimestampTZConsumer.java index f08671f0be61a..3e4911ac1a161 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/TimestampTZConsumer.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/TimestampTZConsumer.java @@ -14,25 +14,21 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.consumer; import java.sql.ResultSet; import java.sql.SQLException; import java.sql.Timestamp; import java.util.Calendar; - import org.apache.arrow.util.Preconditions; import org.apache.arrow.vector.TimeStampMilliTZVector; /** - * Consumer which consume timestamp (with time zone) type values from {@link ResultSet}. - * Write the data to {@link TimeStampMilliTZVector}. + * Consumer which consume timestamp (with time zone) type values from {@link ResultSet}. Write the + * data to {@link TimeStampMilliTZVector}. */ public class TimestampTZConsumer { - /** - * Creates a consumer for {@link TimeStampMilliTZVector}. - */ + /** Creates a consumer for {@link TimeStampMilliTZVector}. */ public static JdbcConsumer createConsumer( TimeStampMilliTZVector vector, int index, boolean nullable, Calendar calendar) { Preconditions.checkArgument(calendar != null, "Calendar cannot be null"); @@ -43,17 +39,14 @@ public static JdbcConsumer createConsumer( } } - /** - * Nullable consumer for timestamp (with time zone). - */ + /** Nullable consumer for timestamp (with time zone). */ static class NullableTimestampTZConsumer extends BaseConsumer { protected final Calendar calendar; - /** - * Instantiate a TimestampConsumer. - */ - public NullableTimestampTZConsumer(TimeStampMilliTZVector vector, int index, Calendar calendar) { + /** Instantiate a TimestampConsumer. */ + public NullableTimestampTZConsumer( + TimeStampMilliTZVector vector, int index, Calendar calendar) { super(vector, index); this.calendar = calendar; } @@ -70,17 +63,14 @@ public void consume(ResultSet resultSet) throws SQLException { } } - /** - * Non-nullable consumer for timestamp (with time zone). - */ + /** Non-nullable consumer for timestamp (with time zone). */ static class NonNullableTimestampConsumer extends BaseConsumer { protected final Calendar calendar; - /** - * Instantiate a TimestampConsumer. - */ - public NonNullableTimestampConsumer(TimeStampMilliTZVector vector, int index, Calendar calendar) { + /** Instantiate a TimestampConsumer. */ + public NonNullableTimestampConsumer( + TimeStampMilliTZVector vector, int index, Calendar calendar) { super(vector, index); this.calendar = calendar; } diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/TinyIntConsumer.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/TinyIntConsumer.java index 40cf087a5ec66..b75b87dd81cc4 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/TinyIntConsumer.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/TinyIntConsumer.java @@ -14,24 +14,21 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.consumer; import java.sql.ResultSet; import java.sql.SQLException; - import org.apache.arrow.vector.TinyIntVector; /** - * Consumer which consume tinyInt type values from {@link ResultSet}. - * Write the data to {@link org.apache.arrow.vector.TinyIntVector}. + * Consumer which consume tinyInt type values from {@link ResultSet}. Write the data to {@link + * org.apache.arrow.vector.TinyIntVector}. */ public abstract class TinyIntConsumer { - /** - * Creates a consumer for {@link TinyIntVector}. - */ - public static JdbcConsumer createConsumer(TinyIntVector vector, int index, boolean nullable) { + /** Creates a consumer for {@link TinyIntVector}. */ + public static JdbcConsumer createConsumer( + TinyIntVector vector, int index, boolean nullable) { if (nullable) { return new NullableTinyIntConsumer(vector, index); } else { @@ -39,14 +36,10 @@ public static JdbcConsumer createConsumer(TinyIntVector vector, i } } - /** - * Nullable consumer for tiny int. - */ + /** Nullable consumer for tiny int. */ static class NullableTinyIntConsumer extends BaseConsumer { - /** - * Instantiate a TinyIntConsumer. - */ + /** Instantiate a TinyIntConsumer. */ public NullableTinyIntConsumer(TinyIntVector vector, int index) { super(vector, index); } @@ -63,14 +56,10 @@ public void consume(ResultSet resultSet) throws SQLException { } } - /** - * Non-nullable consumer for tiny int. - */ + /** Non-nullable consumer for tiny int. */ static class NonNullableTinyIntConsumer extends BaseConsumer { - /** - * Instantiate a TinyIntConsumer. - */ + /** Instantiate a TinyIntConsumer. */ public NonNullableTinyIntConsumer(TinyIntVector vector, int index) { super(vector, index); } diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/VarCharConsumer.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/VarCharConsumer.java index 05333715b8c2f..c81c4f0db124b 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/VarCharConsumer.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/VarCharConsumer.java @@ -14,25 +14,22 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.consumer; import java.nio.charset.StandardCharsets; import java.sql.ResultSet; import java.sql.SQLException; - import org.apache.arrow.vector.VarCharVector; /** - * Consumer which consume varchar type values from {@link ResultSet}. - * Write the data to {@link org.apache.arrow.vector.VarCharVector}. + * Consumer which consume varchar type values from {@link ResultSet}. Write the data to {@link + * org.apache.arrow.vector.VarCharVector}. */ public abstract class VarCharConsumer { - /** - * Creates a consumer for {@link VarCharVector}. - */ - public static JdbcConsumer createConsumer(VarCharVector vector, int index, boolean nullable) { + /** Creates a consumer for {@link VarCharVector}. */ + public static JdbcConsumer createConsumer( + VarCharVector vector, int index, boolean nullable) { if (nullable) { return new NullableVarCharConsumer(vector, index); } else { @@ -40,14 +37,10 @@ public static JdbcConsumer createConsumer(VarCharVector vector, i } } - /** - * Nullable consumer for var char. - */ + /** Nullable consumer for var char. */ static class NullableVarCharConsumer extends BaseConsumer { - /** - * Instantiate a VarCharConsumer. - */ + /** Instantiate a VarCharConsumer. */ public NullableVarCharConsumer(VarCharVector vector, int index) { super(vector, index); } @@ -63,14 +56,10 @@ public void consume(ResultSet resultSet) throws SQLException { } } - /** - * Non-nullable consumer for var char. - */ + /** Non-nullable consumer for var char. */ static class NonNullableVarCharConsumer extends BaseConsumer { - /** - * Instantiate a VarCharConsumer. - */ + /** Instantiate a VarCharConsumer. */ public NonNullableVarCharConsumer(VarCharVector vector, int index) { super(vector, index); } diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/exceptions/JdbcConsumerException.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/exceptions/JdbcConsumerException.java index b235be173cf10..04e26d640c04d 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/exceptions/JdbcConsumerException.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/exceptions/JdbcConsumerException.java @@ -14,15 +14,14 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.consumer.exceptions; import org.apache.arrow.adapter.jdbc.JdbcFieldInfo; import org.apache.arrow.vector.types.pojo.ArrowType; /** - * Exception while consuming JDBC data. This exception stores the JdbcFieldInfo for the column and the - * ArrowType for the corresponding vector for easier debugging. + * Exception while consuming JDBC data. This exception stores the JdbcFieldInfo for the column and + * the ArrowType for the corresponding vector for easier debugging. */ public class JdbcConsumerException extends RuntimeException { final JdbcFieldInfo fieldInfo; @@ -31,12 +30,13 @@ public class JdbcConsumerException extends RuntimeException { /** * Construct JdbcConsumerException with all fields. * - * @param message error message - * @param cause original exception + * @param message error message + * @param cause original exception * @param fieldInfo JdbcFieldInfo for the column * @param arrowType ArrowType for the corresponding vector */ - public JdbcConsumerException(String message, Throwable cause, JdbcFieldInfo fieldInfo, ArrowType arrowType) { + public JdbcConsumerException( + String message, Throwable cause, JdbcFieldInfo fieldInfo, ArrowType arrowType) { super(message, cause); this.fieldInfo = fieldInfo; this.arrowType = arrowType; diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/AbstractJdbcToArrowTest.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/AbstractJdbcToArrowTest.java index 88a66a31aa2c9..1ad4492b35d18 100644 --- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/AbstractJdbcToArrowTest.java +++ b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/AbstractJdbcToArrowTest.java @@ -14,9 +14,10 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.dataformat.yaml.YAMLFactory; import java.io.IOException; import java.sql.Connection; import java.sql.DriverManager; @@ -30,7 +31,6 @@ import java.util.Map; import java.util.TimeZone; import java.util.function.Function; - import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; import org.apache.arrow.util.Preconditions; @@ -41,12 +41,7 @@ import org.junit.Before; import org.junit.Test; -import com.fasterxml.jackson.databind.ObjectMapper; -import com.fasterxml.jackson.dataformat.yaml.YAMLFactory; - -/** - * Class to abstract out some common test functionality for testing JDBC to Arrow. - */ +/** Class to abstract out some common test functionality for testing JDBC to Arrow. */ public abstract class AbstractJdbcToArrowTest { protected static final String BIGINT = "BIGINT_FIELD5"; @@ -69,7 +64,8 @@ public abstract class AbstractJdbcToArrowTest { protected static final String TINYINT = "TINYINT_FIELD3"; protected static final String VARCHAR = "VARCHAR_FIELD13"; protected static final String NULL = "NULL_FIELD18"; - protected static final Map ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP = new HashMap<>(); + protected static final Map ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP = + new HashMap<>(); static { ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP.put(LIST, new JdbcFieldInfo(Types.INTEGER)); @@ -86,12 +82,12 @@ public abstract class AbstractJdbcToArrowTest { * @return Table object * @throws IOException on error */ - protected static Table getTable(String ymlFilePath, @SuppressWarnings("rawtypes") Class clss) throws IOException { - return new ObjectMapper(new YAMLFactory()).readValue( - clss.getClassLoader().getResourceAsStream(ymlFilePath), Table.class); + protected static Table getTable(String ymlFilePath, @SuppressWarnings("rawtypes") Class clss) + throws IOException { + return new ObjectMapper(new YAMLFactory()) + .readValue(clss.getClassLoader().getResourceAsStream(ymlFilePath), Table.class); } - /** * This method creates Connection object and DB table and also populate data into table for test. * @@ -105,7 +101,7 @@ public void setUp() throws SQLException, ClassNotFoundException { String driver = "org.h2.Driver"; Class.forName(driver); conn = DriverManager.getConnection(url); - try (Statement stmt = conn.createStatement();) { + try (Statement stmt = conn.createStatement(); ) { stmt.executeUpdate(table.getCreate()); for (String insert : table.getData()) { stmt.executeUpdate(insert); @@ -136,12 +132,13 @@ public void destroy() throws SQLException { * @throws ClassNotFoundException on error * @throws IOException on error */ - public static Object[][] prepareTestData(String[] testFiles, @SuppressWarnings("rawtypes") Class clss) + public static Object[][] prepareTestData( + String[] testFiles, @SuppressWarnings("rawtypes") Class clss) throws SQLException, ClassNotFoundException, IOException { Object[][] tableArr = new Object[testFiles.length][]; int i = 0; for (String testFile : testFiles) { - tableArr[i++] = new Object[]{getTable(testFile, clss)}; + tableArr[i++] = new Object[] {getTable(testFile, clss)}; } return tableArr; } @@ -159,86 +156,90 @@ public static Object[][] prepareTestData(String[] testFiles, @SuppressWarnings(" * Abstract method to implement logic to assert test various datatype values. * * @param root VectorSchemaRoot for test - * @param isIncludeMapVector is this dataset checks includes map column. - * Jdbc type to 'map' mapping declared in configuration only manually + * @param isIncludeMapVector is this dataset checks includes map column. Jdbc type to 'map' + * mapping declared in configuration only manually */ public abstract void testDataSets(VectorSchemaRoot root, boolean isIncludeMapVector); /** - * For the given SQL query, execute and fetch the data from Relational DB and convert it to Arrow objects. - * This method uses the default Calendar instance with default TimeZone and Locale as returned by the JVM. - * If you wish to use specific TimeZone or Locale for any Date, Time and Timestamp datasets, you may want use - * overloaded API that taken Calendar object instance. + * For the given SQL query, execute and fetch the data from Relational DB and convert it to Arrow + * objects. This method uses the default Calendar instance with default TimeZone and Locale as + * returned by the JVM. If you wish to use specific TimeZone or Locale for any Date, Time and + * Timestamp datasets, you may want use overloaded API that taken Calendar object instance. * - * This method is for test only. + *

This method is for test only. * - * @param connection Database connection to be used. This method will not close the passed connection object. Since - * the caller has passed the connection object it's the responsibility of the caller to close or - * return the connection to the pool. - * @param query The DB Query to fetch the data. - * @param allocator Memory allocator + * @param connection Database connection to be used. This method will not close the passed + * connection object. Since the caller has passed the connection object it's the + * responsibility of the caller to close or return the connection to the pool. + * @param query The DB Query to fetch the data. + * @param allocator Memory allocator * @return Arrow Data Objects {@link VectorSchemaRoot} - * @throws SQLException Propagate any SQL Exceptions to the caller after closing any resources opened such as - * ResultSet and Statement objects. + * @throws SQLException Propagate any SQL Exceptions to the caller after closing any resources + * opened such as ResultSet and Statement objects. */ public VectorSchemaRoot sqlToArrow(Connection connection, String query, BufferAllocator allocator) throws SQLException, IOException { Preconditions.checkNotNull(allocator, "Memory allocator object cannot be null"); - JdbcToArrowConfig config = new JdbcToArrowConfigBuilder(allocator, JdbcToArrowUtils.getUtcCalendar()) - .setArraySubTypeByColumnNameMap(ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP) - .build(); + JdbcToArrowConfig config = + new JdbcToArrowConfigBuilder(allocator, JdbcToArrowUtils.getUtcCalendar()) + .setArraySubTypeByColumnNameMap(ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP) + .build(); return sqlToArrow(connection, query, config); } /** - * For the given SQL query, execute and fetch the data from Relational DB and convert it to Arrow objects. + * For the given SQL query, execute and fetch the data from Relational DB and convert it to Arrow + * objects. * - * This method is for test only. + *

This method is for test only. * - * @param connection Database connection to be used. This method will not close the passed connection object. Since - * the caller has passed the connection object it's the responsibility of the caller to close or - * return the connection to the pool. - * @param query The DB Query to fetch the data. - * @param allocator Memory allocator - * @param calendar Calendar object to use to handle Date, Time and Timestamp datasets. + * @param connection Database connection to be used. This method will not close the passed + * connection object. Since the caller has passed the connection object it's the + * responsibility of the caller to close or return the connection to the pool. + * @param query The DB Query to fetch the data. + * @param allocator Memory allocator + * @param calendar Calendar object to use to handle Date, Time and Timestamp datasets. * @return Arrow Data Objects {@link VectorSchemaRoot} - * @throws SQLException Propagate any SQL Exceptions to the caller after closing any resources opened such as - * ResultSet and Statement objects. + * @throws SQLException Propagate any SQL Exceptions to the caller after closing any resources + * opened such as ResultSet and Statement objects. */ public VectorSchemaRoot sqlToArrow( - Connection connection, - String query, - BufferAllocator allocator, - Calendar calendar) throws SQLException, IOException { + Connection connection, String query, BufferAllocator allocator, Calendar calendar) + throws SQLException, IOException { Preconditions.checkNotNull(allocator, "Memory allocator object cannot be null"); Preconditions.checkNotNull(calendar, "Calendar object cannot be null"); - JdbcToArrowConfig config = new JdbcToArrowConfigBuilder(allocator, calendar) - .setArraySubTypeByColumnNameMap(ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP) - .build(); + JdbcToArrowConfig config = + new JdbcToArrowConfigBuilder(allocator, calendar) + .setArraySubTypeByColumnNameMap(ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP) + .build(); return sqlToArrow(connection, query, config); } /** - * For the given SQL query, execute and fetch the data from Relational DB and convert it to Arrow objects. + * For the given SQL query, execute and fetch the data from Relational DB and convert it to Arrow + * objects. * - * This method is for test only. + *

This method is for test only. * - * @param connection Database connection to be used. This method will not close the passed connection object. - * Since the caller has passed the connection object it's the responsibility of the caller - * to close or return the connection to the pool. - * @param query The DB Query to fetch the data. - * @param config Configuration + * @param connection Database connection to be used. This method will not close the passed + * connection object. Since the caller has passed the connection object it's the + * responsibility of the caller to close or return the connection to the pool. + * @param query The DB Query to fetch the data. + * @param config Configuration * @return Arrow Data Objects {@link VectorSchemaRoot} - * @throws SQLException Propagate any SQL Exceptions to the caller after closing any resources opened such as - * ResultSet and Statement objects. + * @throws SQLException Propagate any SQL Exceptions to the caller after closing any resources + * opened such as ResultSet and Statement objects. */ - public static VectorSchemaRoot sqlToArrow(Connection connection, String query, JdbcToArrowConfig config) + public static VectorSchemaRoot sqlToArrow( + Connection connection, String query, JdbcToArrowConfig config) throws SQLException, IOException { Preconditions.checkNotNull(connection, "JDBC connection object cannot be null"); - Preconditions.checkArgument(query != null && query.length() > 0, "SQL query cannot be null or empty"); + Preconditions.checkArgument( + query != null && query.length() > 0, "SQL query cannot be null or empty"); try (Statement stmt = connection.createStatement()) { return sqlToArrow(stmt.executeQuery(query), config); @@ -246,10 +247,10 @@ public static VectorSchemaRoot sqlToArrow(Connection connection, String query, J } /** - * For the given JDBC {@link ResultSet}, fetch the data from Relational DB and convert it to Arrow objects. This - * method uses the default RootAllocator and Calendar object. + * For the given JDBC {@link ResultSet}, fetch the data from Relational DB and convert it to Arrow + * objects. This method uses the default RootAllocator and Calendar object. * - * This method is for test only. + *

This method is for test only. * * @param resultSet ResultSet to use to fetch the data from underlying database * @return Arrow Data Objects {@link VectorSchemaRoot} @@ -262,9 +263,10 @@ public static VectorSchemaRoot sqlToArrow(ResultSet resultSet) throws SQLExcepti } /** - * For the given JDBC {@link ResultSet}, fetch the data from Relational DB and convert it to Arrow objects. + * For the given JDBC {@link ResultSet}, fetch the data from Relational DB and convert it to Arrow + * objects. * - * This method is for test only. + *

This method is for test only. * * @param resultSet ResultSet to use to fetch the data from underlying database * @param allocator Memory allocator @@ -275,62 +277,69 @@ public static VectorSchemaRoot sqlToArrow(ResultSet resultSet, BufferAllocator a throws SQLException, IOException { Preconditions.checkNotNull(allocator, "Memory Allocator object cannot be null"); - JdbcToArrowConfig config = new JdbcToArrowConfigBuilder(allocator, JdbcToArrowUtils.getUtcCalendar()) - .setArraySubTypeByColumnNameMap(ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP) - .build(); + JdbcToArrowConfig config = + new JdbcToArrowConfigBuilder(allocator, JdbcToArrowUtils.getUtcCalendar()) + .setArraySubTypeByColumnNameMap(ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP) + .build(); return sqlToArrow(resultSet, config); } /** - * For the given JDBC {@link ResultSet}, fetch the data from Relational DB and convert it to Arrow objects. + * For the given JDBC {@link ResultSet}, fetch the data from Relational DB and convert it to Arrow + * objects. * - * This method is for test only. + *

This method is for test only. * * @param resultSet ResultSet to use to fetch the data from underlying database - * @param calendar Calendar instance to use for Date, Time and Timestamp datasets, or null if none. + * @param calendar Calendar instance to use for Date, Time and Timestamp datasets, or null + * if none. * @return Arrow Data Objects {@link VectorSchemaRoot} * @throws SQLException on error */ - public static VectorSchemaRoot sqlToArrow(ResultSet resultSet, Calendar calendar) throws SQLException, IOException { + public static VectorSchemaRoot sqlToArrow(ResultSet resultSet, Calendar calendar) + throws SQLException, IOException { Preconditions.checkNotNull(resultSet, "JDBC ResultSet object cannot be null"); - JdbcToArrowConfig config = new JdbcToArrowConfigBuilder(new RootAllocator(Integer.MAX_VALUE), calendar) - .setArraySubTypeByColumnNameMap(ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP) - .build(); + JdbcToArrowConfig config = + new JdbcToArrowConfigBuilder(new RootAllocator(Integer.MAX_VALUE), calendar) + .setArraySubTypeByColumnNameMap(ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP) + .build(); return sqlToArrow(resultSet, config); } /** - * For the given JDBC {@link ResultSet}, fetch the data from Relational DB and convert it to Arrow objects. + * For the given JDBC {@link ResultSet}, fetch the data from Relational DB and convert it to Arrow + * objects. * - * This method is for test only. + *

This method is for test only. * * @param resultSet ResultSet to use to fetch the data from underlying database * @param allocator Memory allocator to use. - * @param calendar Calendar instance to use for Date, Time and Timestamp datasets, or null if none. + * @param calendar Calendar instance to use for Date, Time and Timestamp datasets, or null + * if none. * @return Arrow Data Objects {@link VectorSchemaRoot} * @throws SQLException on error */ public static VectorSchemaRoot sqlToArrow( - ResultSet resultSet, - BufferAllocator allocator, - Calendar calendar) + ResultSet resultSet, BufferAllocator allocator, Calendar calendar) throws SQLException, IOException { Preconditions.checkNotNull(allocator, "Memory Allocator object cannot be null"); - JdbcToArrowConfig config = new JdbcToArrowConfigBuilder(allocator, calendar) - .setArraySubTypeByColumnNameMap(ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP) - .build(); + JdbcToArrowConfig config = + new JdbcToArrowConfigBuilder(allocator, calendar) + .setArraySubTypeByColumnNameMap(ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP) + .build(); return sqlToArrow(resultSet, config); } /** - * For the given JDBC {@link ResultSet}, fetch the data from Relational DB and convert it to Arrow objects. + * For the given JDBC {@link ResultSet}, fetch the data from Relational DB and convert it to Arrow + * objects. * - * This method is for test only. + *

This method is for test only. * * @param resultSet ResultSet to use to fetch the data from underlying database - * @param config Configuration of the conversion from JDBC to Arrow. + * @param config Configuration of the conversion from JDBC to Arrow. * @return Arrow Data Objects {@link VectorSchemaRoot} * @throws SQLException on error */ @@ -339,8 +348,10 @@ public static VectorSchemaRoot sqlToArrow(ResultSet resultSet, JdbcToArrowConfig Preconditions.checkNotNull(resultSet, "JDBC ResultSet object cannot be null"); Preconditions.checkNotNull(config, "The configuration cannot be null"); - VectorSchemaRoot root = VectorSchemaRoot.create( - JdbcToArrowUtils.jdbcToArrowSchema(resultSet.getMetaData(), config), config.getAllocator()); + VectorSchemaRoot root = + VectorSchemaRoot.create( + JdbcToArrowUtils.jdbcToArrowSchema(resultSet.getMetaData(), config), + config.getAllocator()); if (config.getTargetBatchSize() != JdbcToArrowConfig.NO_LIMIT_BATCH_SIZE) { ValueVectorUtility.preAllocate(root, config.getTargetBatchSize()); } @@ -350,12 +361,14 @@ public static VectorSchemaRoot sqlToArrow(ResultSet resultSet, JdbcToArrowConfig /** * Register MAP_FIELD20 as ArrowType.Map - * @param calendar Calendar instance to use for Date, Time and Timestamp datasets, or null if none. + * + * @param calendar Calendar instance to use for Date, Time and Timestamp datasets, or null + * if none. * @param rsmd ResultSetMetaData to lookup column name from result set metadata * @return typeConverter instance with mapping column to Map type */ protected Function jdbcToArrowTypeConverter( - Calendar calendar, ResultSetMetaData rsmd) { + Calendar calendar, ResultSetMetaData rsmd) { return (jdbcFieldInfo) -> { String columnLabel = null; try { @@ -377,5 +390,4 @@ protected Function jdbcToArrowTypeConverter( protected ResultSetMetaData getQueryMetaData(String query) throws SQLException { return conn.createStatement().executeQuery(query).getMetaData(); } - } diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/JdbcFieldInfoTest.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/JdbcFieldInfoTest.java index b1a8b8f226753..cd6a78eae2b1a 100644 --- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/JdbcFieldInfoTest.java +++ b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/JdbcFieldInfoTest.java @@ -14,13 +14,11 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc; import static org.junit.Assert.*; import java.sql.Types; - import org.junit.Test; public class JdbcFieldInfoTest { diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/JdbcParameterBinderTest.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/JdbcParameterBinderTest.java index 15b9ab0386159..a05130f18e4ac 100644 --- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/JdbcParameterBinderTest.java +++ b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/JdbcParameterBinderTest.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc; import static org.assertj.core.api.Assertions.assertThat; @@ -32,7 +31,6 @@ import java.util.List; import java.util.Map; import java.util.function.BiConsumer; - import org.apache.arrow.adapter.jdbc.binder.ColumnBinder; import org.apache.arrow.memory.ArrowBuf; import org.apache.arrow.memory.BufferAllocator; @@ -106,11 +104,11 @@ void bindOrder() throws SQLException { Field.nullable("ints1", new ArrowType.Int(32, true)), Field.nullable("ints2", new ArrowType.Int(32, true)))); try (final MockPreparedStatement statement = new MockPreparedStatement(); - final VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator)) { + final VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator)) { final JdbcParameterBinder binder = JdbcParameterBinder.builder(statement, root) - .bind(/*paramIndex=*/ 1, /*colIndex=*/ 2) - .bind(/*paramIndex=*/ 2, /*colIndex=*/ 0) + .bind(/*parameterIndex=*/ 1, /*columnIndex=*/ 2) + .bind(/*parameterIndex=*/ 2, /*columnIndex=*/ 0) .build(); assertThat(binder.next()).isFalse(); @@ -161,17 +159,17 @@ void bindOrder() throws SQLException { @Test void customBinder() throws SQLException { final Schema schema = - new Schema(Collections.singletonList( - Field.nullable("ints0", new ArrowType.Int(32, true)))); + new Schema(Collections.singletonList(Field.nullable("ints0", new ArrowType.Int(32, true)))); try (final MockPreparedStatement statement = new MockPreparedStatement(); - final VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator)) { + final VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator)) { final JdbcParameterBinder binder = JdbcParameterBinder.builder(statement, root) .bind( - /*paramIndex=*/ 1, + /*parameterIndex=*/ 1, new ColumnBinder() { private final IntVector vector = (IntVector) root.getVector(0); + @Override public void bind(PreparedStatement statement, int parameterIndex, int rowIndex) throws SQLException { @@ -212,7 +210,9 @@ public FieldVector getVector() { @Test void bool() throws SQLException { - testSimpleType(ArrowType.Bool.INSTANCE, Types.BOOLEAN, + testSimpleType( + ArrowType.Bool.INSTANCE, + Types.BOOLEAN, (BitVector vector, Integer index, Boolean value) -> vector.setSafe(index, value ? 1 : 0), BitVector::setNull, Arrays.asList(true, false, true)); @@ -220,53 +220,76 @@ void bool() throws SQLException { @Test void int8() throws SQLException { - testSimpleType(new ArrowType.Int(8, true), Types.TINYINT, - TinyIntVector::setSafe, TinyIntVector::setNull, + testSimpleType( + new ArrowType.Int(8, true), + Types.TINYINT, + TinyIntVector::setSafe, + TinyIntVector::setNull, Arrays.asList(Byte.MAX_VALUE, Byte.MIN_VALUE, (byte) 42)); } @Test void int16() throws SQLException { - testSimpleType(new ArrowType.Int(16, true), Types.SMALLINT, - SmallIntVector::setSafe, SmallIntVector::setNull, + testSimpleType( + new ArrowType.Int(16, true), + Types.SMALLINT, + SmallIntVector::setSafe, + SmallIntVector::setNull, Arrays.asList(Short.MAX_VALUE, Short.MIN_VALUE, (short) 42)); } @Test void int32() throws SQLException { - testSimpleType(new ArrowType.Int(32, true), Types.INTEGER, - IntVector::setSafe, IntVector::setNull, + testSimpleType( + new ArrowType.Int(32, true), + Types.INTEGER, + IntVector::setSafe, + IntVector::setNull, Arrays.asList(Integer.MAX_VALUE, Integer.MIN_VALUE, 42)); } @Test void int64() throws SQLException { - testSimpleType(new ArrowType.Int(64, true), Types.BIGINT, - BigIntVector::setSafe, BigIntVector::setNull, + testSimpleType( + new ArrowType.Int(64, true), + Types.BIGINT, + BigIntVector::setSafe, + BigIntVector::setNull, Arrays.asList(Long.MAX_VALUE, Long.MIN_VALUE, 42L)); } @Test void float32() throws SQLException { - testSimpleType(new ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE), Types.REAL, - Float4Vector::setSafe, Float4Vector::setNull, + testSimpleType( + new ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE), + Types.REAL, + Float4Vector::setSafe, + Float4Vector::setNull, Arrays.asList(Float.MIN_VALUE, Float.MAX_VALUE, Float.POSITIVE_INFINITY)); } @Test void float64() throws SQLException { - testSimpleType(new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE), Types.DOUBLE, - Float8Vector::setSafe, Float8Vector::setNull, + testSimpleType( + new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE), + Types.DOUBLE, + Float8Vector::setSafe, + Float8Vector::setNull, Arrays.asList(Double.MIN_VALUE, Double.MAX_VALUE, Double.POSITIVE_INFINITY)); } @Test void time32() throws SQLException { - testSimpleType(new ArrowType.Time(TimeUnit.SECOND, 32), Types.TIME, - (valueVectors, index, value) -> valueVectors.setSafe(index, (int) (value.getTime() / 1_000)), + testSimpleType( + new ArrowType.Time(TimeUnit.SECOND, 32), + Types.TIME, + (valueVectors, index, value) -> + valueVectors.setSafe(index, (int) (value.getTime() / 1_000)), TimeSecVector::setNull, Arrays.asList(new Time(-128_000), new Time(104_000), new Time(-42_000))); - testSimpleType(new ArrowType.Time(TimeUnit.MILLISECOND, 32), Types.TIME, + testSimpleType( + new ArrowType.Time(TimeUnit.MILLISECOND, 32), + Types.TIME, (valueVectors, index, value) -> valueVectors.setSafe(index, (int) value.getTime()), TimeMilliVector::setNull, Arrays.asList(new Time(-128_000), new Time(104_000), new Time(-42_000))); @@ -274,69 +297,108 @@ void time32() throws SQLException { @Test void time64() throws SQLException { - testSimpleType(new ArrowType.Time(TimeUnit.MICROSECOND, 64), Types.TIME, - (valueVectors, index, value) -> valueVectors.setSafe(index, (int) (value.getTime() * 1_000)), + testSimpleType( + new ArrowType.Time(TimeUnit.MICROSECOND, 64), + Types.TIME, + (valueVectors, index, value) -> valueVectors.setSafe(index, (value.getTime() * 1_000)), TimeMicroVector::setNull, Arrays.asList(new Time(-128_000), new Time(104_000), new Time(-42_000))); - testSimpleType(new ArrowType.Time(TimeUnit.NANOSECOND, 64), Types.TIME, - (valueVectors, index, value) -> valueVectors.setSafe(index, (int) (value.getTime() * 1_000_000)), + testSimpleType( + new ArrowType.Time(TimeUnit.NANOSECOND, 64), + Types.TIME, + (valueVectors, index, value) -> valueVectors.setSafe(index, (value.getTime() * 1_000_000)), TimeNanoVector::setNull, Arrays.asList(new Time(-128), new Time(104), new Time(-42))); } @Test void date32() throws SQLException { - testSimpleType(new ArrowType.Date(DateUnit.DAY), Types.DATE, - (valueVectors, index, value) -> valueVectors.setSafe(index, (int) (value.getTime() / MILLIS_PER_DAY)), + testSimpleType( + new ArrowType.Date(DateUnit.DAY), + Types.DATE, + (valueVectors, index, value) -> + valueVectors.setSafe(index, (int) (value.getTime() / MILLIS_PER_DAY)), DateDayVector::setNull, - Arrays.asList(new Date(-5 * MILLIS_PER_DAY), new Date(2 * MILLIS_PER_DAY), new Date(MILLIS_PER_DAY))); + Arrays.asList( + new Date(-5 * MILLIS_PER_DAY), new Date(2 * MILLIS_PER_DAY), new Date(MILLIS_PER_DAY))); } @Test void date64() throws SQLException { - testSimpleType(new ArrowType.Date(DateUnit.MILLISECOND), Types.DATE, + testSimpleType( + new ArrowType.Date(DateUnit.MILLISECOND), + Types.DATE, (valueVectors, index, value) -> valueVectors.setSafe(index, value.getTime()), DateMilliVector::setNull, - Arrays.asList(new Date(-5 * MILLIS_PER_DAY), new Date(2 * MILLIS_PER_DAY), new Date(MILLIS_PER_DAY))); + Arrays.asList( + new Date(-5 * MILLIS_PER_DAY), new Date(2 * MILLIS_PER_DAY), new Date(MILLIS_PER_DAY))); } @Test void timestamp() throws SQLException { - List values = Arrays.asList(new Timestamp(-128_000), new Timestamp(104_000), new Timestamp(-42_000)); - testSimpleType(new ArrowType.Timestamp(TimeUnit.SECOND, null), Types.TIMESTAMP, + List values = + Arrays.asList(new Timestamp(-128_000), new Timestamp(104_000), new Timestamp(-42_000)); + testSimpleType( + new ArrowType.Timestamp(TimeUnit.SECOND, null), + Types.TIMESTAMP, (valueVectors, index, value) -> valueVectors.setSafe(index, value.getTime() / 1_000), - TimeStampSecVector::setNull, values); - testSimpleType(new ArrowType.Timestamp(TimeUnit.MILLISECOND, null), Types.TIMESTAMP, + TimeStampSecVector::setNull, + values); + testSimpleType( + new ArrowType.Timestamp(TimeUnit.MILLISECOND, null), + Types.TIMESTAMP, (valueVectors, index, value) -> valueVectors.setSafe(index, value.getTime()), - TimeStampMilliVector::setNull, values); - testSimpleType(new ArrowType.Timestamp(TimeUnit.MICROSECOND, null), Types.TIMESTAMP, + TimeStampMilliVector::setNull, + values); + testSimpleType( + new ArrowType.Timestamp(TimeUnit.MICROSECOND, null), + Types.TIMESTAMP, (valueVectors, index, value) -> valueVectors.setSafe(index, value.getTime() * 1_000), - TimeStampMicroVector::setNull, values); - testSimpleType(new ArrowType.Timestamp(TimeUnit.NANOSECOND, null), Types.TIMESTAMP, + TimeStampMicroVector::setNull, + values); + testSimpleType( + new ArrowType.Timestamp(TimeUnit.NANOSECOND, null), + Types.TIMESTAMP, (valueVectors, index, value) -> valueVectors.setSafe(index, value.getTime() * 1_000_000), - TimeStampNanoVector::setNull, values); + TimeStampNanoVector::setNull, + values); } @Test void timestampTz() throws SQLException { - List values = Arrays.asList(new Timestamp(-128_000), new Timestamp(104_000), new Timestamp(-42_000)); - testSimpleType(new ArrowType.Timestamp(TimeUnit.SECOND, "UTC"), Types.TIMESTAMP_WITH_TIMEZONE, + List values = + Arrays.asList(new Timestamp(-128_000), new Timestamp(104_000), new Timestamp(-42_000)); + testSimpleType( + new ArrowType.Timestamp(TimeUnit.SECOND, "UTC"), + Types.TIMESTAMP_WITH_TIMEZONE, (valueVectors, index, value) -> valueVectors.setSafe(index, value.getTime() / 1_000), - TimeStampSecTZVector::setNull, values); - testSimpleType(new ArrowType.Timestamp(TimeUnit.MILLISECOND, "UTC"), Types.TIMESTAMP_WITH_TIMEZONE, + TimeStampSecTZVector::setNull, + values); + testSimpleType( + new ArrowType.Timestamp(TimeUnit.MILLISECOND, "UTC"), + Types.TIMESTAMP_WITH_TIMEZONE, (valueVectors, index, value) -> valueVectors.setSafe(index, value.getTime()), - TimeStampMilliTZVector::setNull, values); - testSimpleType(new ArrowType.Timestamp(TimeUnit.MICROSECOND, "UTC"), Types.TIMESTAMP_WITH_TIMEZONE, + TimeStampMilliTZVector::setNull, + values); + testSimpleType( + new ArrowType.Timestamp(TimeUnit.MICROSECOND, "UTC"), + Types.TIMESTAMP_WITH_TIMEZONE, (valueVectors, index, value) -> valueVectors.setSafe(index, value.getTime() * 1_000), - TimeStampMicroTZVector::setNull, values); - testSimpleType(new ArrowType.Timestamp(TimeUnit.NANOSECOND, "UTC"), Types.TIMESTAMP_WITH_TIMEZONE, + TimeStampMicroTZVector::setNull, + values); + testSimpleType( + new ArrowType.Timestamp(TimeUnit.NANOSECOND, "UTC"), + Types.TIMESTAMP_WITH_TIMEZONE, (valueVectors, index, value) -> valueVectors.setSafe(index, value.getTime() * 1_000_000), - TimeStampNanoTZVector::setNull, values); + TimeStampNanoTZVector::setNull, + values); } @Test void utf8() throws SQLException { - testSimpleType(ArrowType.Utf8.INSTANCE, Types.VARCHAR, + testSimpleType( + ArrowType.Utf8.INSTANCE, + Types.VARCHAR, (VarCharVector vector, Integer index, String value) -> vector.setSafe(index, value.getBytes(StandardCharsets.UTF_8)), BaseVariableWidthVector::setNull, @@ -345,7 +407,9 @@ void utf8() throws SQLException { @Test void largeUtf8() throws SQLException { - testSimpleType(ArrowType.LargeUtf8.INSTANCE, Types.LONGVARCHAR, + testSimpleType( + ArrowType.LargeUtf8.INSTANCE, + Types.LONGVARCHAR, (LargeVarCharVector vector, Integer index, String value) -> vector.setSafe(index, value.getBytes(StandardCharsets.UTF_8)), BaseLargeVariableWidthVector::setNull, @@ -354,155 +418,200 @@ void largeUtf8() throws SQLException { @Test void binary() throws SQLException { - testSimpleType(ArrowType.Binary.INSTANCE, Types.VARBINARY, - (VarBinaryVector vector, Integer index, byte[] value) -> - vector.setSafe(index, value), + testSimpleType( + ArrowType.Binary.INSTANCE, + Types.VARBINARY, + (VarBinaryVector vector, Integer index, byte[] value) -> vector.setSafe(index, value), BaseVariableWidthVector::setNull, Arrays.asList(new byte[0], new byte[] {2, -4}, new byte[] {0, -1, 127, -128})); } @Test void largeBinary() throws SQLException { - testSimpleType(ArrowType.LargeBinary.INSTANCE, Types.LONGVARBINARY, - (LargeVarBinaryVector vector, Integer index, byte[] value) -> - vector.setSafe(index, value), + testSimpleType( + ArrowType.LargeBinary.INSTANCE, + Types.LONGVARBINARY, + (LargeVarBinaryVector vector, Integer index, byte[] value) -> vector.setSafe(index, value), BaseLargeVariableWidthVector::setNull, Arrays.asList(new byte[0], new byte[] {2, -4}, new byte[] {0, -1, 127, -128})); } @Test void fixedSizeBinary() throws SQLException { - testSimpleType(new ArrowType.FixedSizeBinary(3), Types.BINARY, - FixedSizeBinaryVector::setSafe, FixedSizeBinaryVector::setNull, + testSimpleType( + new ArrowType.FixedSizeBinary(3), + Types.BINARY, + FixedSizeBinaryVector::setSafe, + FixedSizeBinaryVector::setNull, Arrays.asList(new byte[3], new byte[] {1, 2, -4}, new byte[] {-1, 127, -128})); } @Test void decimal128() throws SQLException { - testSimpleType(new ArrowType.Decimal(/*precision*/ 12, /*scale*/3, 128), Types.DECIMAL, - DecimalVector::setSafe, DecimalVector::setNull, - Arrays.asList(new BigDecimal("120.429"), new BigDecimal("-10590.123"), new BigDecimal("0.000"))); + testSimpleType( + new ArrowType.Decimal(/*precision*/ 12, /*scale*/ 3, 128), + Types.DECIMAL, + DecimalVector::setSafe, + DecimalVector::setNull, + Arrays.asList( + new BigDecimal("120.429"), new BigDecimal("-10590.123"), new BigDecimal("0.000"))); } @Test void decimal256() throws SQLException { - testSimpleType(new ArrowType.Decimal(/*precision*/ 12, /*scale*/3, 256), Types.DECIMAL, - Decimal256Vector::setSafe, Decimal256Vector::setNull, - Arrays.asList(new BigDecimal("120.429"), new BigDecimal("-10590.123"), new BigDecimal("0.000"))); + testSimpleType( + new ArrowType.Decimal(/*precision*/ 12, /*scale*/ 3, 256), + Types.DECIMAL, + Decimal256Vector::setSafe, + Decimal256Vector::setNull, + Arrays.asList( + new BigDecimal("120.429"), new BigDecimal("-10590.123"), new BigDecimal("0.000"))); } @Test void listOfDouble() throws SQLException { - TriConsumer setValue = (listVector, index, values) -> { - org.apache.arrow.vector.complex.impl.UnionListWriter writer = listVector.getWriter(); - writer.setPosition(index); - writer.startList(); - Arrays.stream(values).forEach(doubleValue -> writer.float8().writeFloat8(doubleValue)); - writer.endList(); - listVector.setLastSet(index); - }; - List values = Arrays.asList(new Double[]{0.0, Math.PI}, new Double[]{1.1, -352346.2, 2355.6}, - new Double[]{-1024.3}, new Double[]{}); - testListType(new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE), setValue, ListVector::setNull, values); + TriConsumer setValue = + (listVector, index, values) -> { + org.apache.arrow.vector.complex.impl.UnionListWriter writer = listVector.getWriter(); + writer.setPosition(index); + writer.startList(); + Arrays.stream(values).forEach(doubleValue -> writer.float8().writeFloat8(doubleValue)); + writer.endList(); + listVector.setLastSet(index); + }; + List values = + Arrays.asList( + new Double[] {0.0, Math.PI}, new Double[] {1.1, -352346.2, 2355.6}, + new Double[] {-1024.3}, new Double[] {}); + testListType( + new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE), + setValue, + ListVector::setNull, + values); } @Test void listOfInt64() throws SQLException { - TriConsumer setValue = (listVector, index, values) -> { - org.apache.arrow.vector.complex.impl.UnionListWriter writer = listVector.getWriter(); - writer.setPosition(index); - writer.startList(); - Arrays.stream(values).forEach(longValue -> writer.bigInt().writeBigInt(longValue)); - writer.endList(); - listVector.setLastSet(index); - }; - List values = Arrays.asList(new Long[]{1L, 2L, 3L}, new Long[]{4L, 5L}, - new Long[]{512L, 1024L, 2048L, 4096L}, new Long[]{}); + TriConsumer setValue = + (listVector, index, values) -> { + org.apache.arrow.vector.complex.impl.UnionListWriter writer = listVector.getWriter(); + writer.setPosition(index); + writer.startList(); + Arrays.stream(values).forEach(longValue -> writer.bigInt().writeBigInt(longValue)); + writer.endList(); + listVector.setLastSet(index); + }; + List values = + Arrays.asList( + new Long[] {1L, 2L, 3L}, + new Long[] {4L, 5L}, + new Long[] {512L, 1024L, 2048L, 4096L}, + new Long[] {}); testListType((ArrowType) new ArrowType.Int(64, true), setValue, ListVector::setNull, values); } @Test void listOfInt32() throws SQLException { - TriConsumer setValue = (listVector, index, values) -> { - org.apache.arrow.vector.complex.impl.UnionListWriter writer = listVector.getWriter(); - writer.setPosition(index); - writer.startList(); - Arrays.stream(values).forEach(integerValue -> writer.integer().writeInt(integerValue)); - writer.endList(); - listVector.setLastSet(index); - }; - List values = Arrays.asList(new Integer[]{1, 2, 3}, new Integer[]{4, 5}, - new Integer[]{512, 1024, 2048, 4096}, new Integer[]{}); + TriConsumer setValue = + (listVector, index, values) -> { + org.apache.arrow.vector.complex.impl.UnionListWriter writer = listVector.getWriter(); + writer.setPosition(index); + writer.startList(); + Arrays.stream(values).forEach(integerValue -> writer.integer().writeInt(integerValue)); + writer.endList(); + listVector.setLastSet(index); + }; + List values = + Arrays.asList( + new Integer[] {1, 2, 3}, + new Integer[] {4, 5}, + new Integer[] {512, 1024, 2048, 4096}, + new Integer[] {}); testListType((ArrowType) new ArrowType.Int(32, true), setValue, ListVector::setNull, values); } @Test void listOfBoolean() throws SQLException { - TriConsumer setValue = (listVector, index, values) -> { - org.apache.arrow.vector.complex.impl.UnionListWriter writer = listVector.getWriter(); - writer.setPosition(index); - writer.startList(); - Arrays.stream(values).forEach(booleanValue -> writer.bit().writeBit(booleanValue ? 1 : 0)); - writer.endList(); - listVector.setLastSet(index); - }; - List values = Arrays.asList(new Boolean[]{true, false}, - new Boolean[]{false, false}, new Boolean[]{true, true, false, true}, new Boolean[]{}); + TriConsumer setValue = + (listVector, index, values) -> { + org.apache.arrow.vector.complex.impl.UnionListWriter writer = listVector.getWriter(); + writer.setPosition(index); + writer.startList(); + Arrays.stream(values) + .forEach(booleanValue -> writer.bit().writeBit(booleanValue ? 1 : 0)); + writer.endList(); + listVector.setLastSet(index); + }; + List values = + Arrays.asList( + new Boolean[] {true, false}, + new Boolean[] {false, false}, + new Boolean[] {true, true, false, true}, + new Boolean[] {}); testListType((ArrowType) new ArrowType.Bool(), setValue, ListVector::setNull, values); } @Test void listOfString() throws SQLException { - TriConsumer setValue = (listVector, index, values) -> { - org.apache.arrow.vector.complex.impl.UnionListWriter writer = listVector.getWriter(); - writer.setPosition(index); - writer.startList(); - Arrays.stream(values).forEach(stringValue -> { - if (stringValue != null) { - byte[] stringValueBytes = stringValue.getBytes(StandardCharsets.UTF_8); - try (ArrowBuf stringBuffer = allocator.buffer(stringValueBytes.length)) { - stringBuffer.writeBytes(stringValueBytes); - writer.varChar().writeVarChar(0, stringValueBytes.length, stringBuffer); - } - } else { - writer.varChar().writeNull(); - } - }); - writer.endList(); - listVector.setLastSet(index); - }; - List values = Arrays.asList(new String[]{"aaaa", "b1"}, - new String[]{"c", null, "d"}, new String[]{"e", "f", "g", "h"}, new String[]{}); + TriConsumer setValue = + (listVector, index, values) -> { + org.apache.arrow.vector.complex.impl.UnionListWriter writer = listVector.getWriter(); + writer.setPosition(index); + writer.startList(); + Arrays.stream(values) + .forEach( + stringValue -> { + if (stringValue != null) { + byte[] stringValueBytes = stringValue.getBytes(StandardCharsets.UTF_8); + try (ArrowBuf stringBuffer = allocator.buffer(stringValueBytes.length)) { + stringBuffer.writeBytes(stringValueBytes); + writer.varChar().writeVarChar(0, stringValueBytes.length, stringBuffer); + } + } else { + writer.varChar().writeNull(); + } + }); + writer.endList(); + listVector.setLastSet(index); + }; + List values = + Arrays.asList( + new String[] {"aaaa", "b1"}, + new String[] {"c", null, "d"}, + new String[] {"e", "f", "g", "h"}, + new String[] {}); testListType((ArrowType) new ArrowType.Utf8(), setValue, ListVector::setNull, values); } @Test void mapOfString() throws SQLException { - TriConsumer> setValue = (mapVector, index, values) -> { - org.apache.arrow.vector.complex.impl.UnionMapWriter mapWriter = mapVector.getWriter(); - mapWriter.setPosition(index); - mapWriter.startMap(); - values.entrySet().forEach(mapValue -> { - if (mapValue != null) { - byte[] keyBytes = mapValue.getKey().getBytes(StandardCharsets.UTF_8); - byte[] valueBytes = mapValue.getValue().getBytes(StandardCharsets.UTF_8); - try ( - ArrowBuf keyBuf = allocator.buffer(keyBytes.length); - ArrowBuf valueBuf = allocator.buffer(valueBytes.length); - ) { - mapWriter.startEntry(); - keyBuf.writeBytes(keyBytes); - valueBuf.writeBytes(valueBytes); - mapWriter.key().varChar().writeVarChar(0, keyBytes.length, keyBuf); - mapWriter.value().varChar().writeVarChar(0, valueBytes.length, valueBuf); - mapWriter.endEntry(); - } - } else { - mapWriter.writeNull(); - } - }); - mapWriter.endMap(); - }; + TriConsumer> setValue = + (mapVector, index, values) -> { + org.apache.arrow.vector.complex.impl.UnionMapWriter mapWriter = mapVector.getWriter(); + mapWriter.setPosition(index); + mapWriter.startMap(); + values + .entrySet() + .forEach( + mapValue -> { + if (mapValue != null) { + byte[] keyBytes = mapValue.getKey().getBytes(StandardCharsets.UTF_8); + byte[] valueBytes = mapValue.getValue().getBytes(StandardCharsets.UTF_8); + try (ArrowBuf keyBuf = allocator.buffer(keyBytes.length); + ArrowBuf valueBuf = allocator.buffer(valueBytes.length); ) { + mapWriter.startEntry(); + keyBuf.writeBytes(keyBytes); + valueBuf.writeBytes(valueBytes); + mapWriter.key().varChar().writeVarChar(0, keyBytes.length, keyBuf); + mapWriter.value().varChar().writeVarChar(0, valueBytes.length, valueBuf); + mapWriter.endEntry(); + } + } else { + mapWriter.writeNull(); + } + }); + mapWriter.endMap(); + }; JsonStringHashMap value1 = new JsonStringHashMap(); value1.put("a", "b"); @@ -514,28 +623,34 @@ void mapOfString() throws SQLException { JsonStringHashMap value3 = new JsonStringHashMap(); value3.put("y", "z"); value3.put("arrow", "cool"); - List> values = Arrays.asList(value1, value2, value3, Collections.emptyMap()); - testMapType(new ArrowType.Map(true), setValue, MapVector::setNull, values, new ArrowType.Utf8()); + List> values = + Arrays.asList(value1, value2, value3, Collections.emptyMap()); + testMapType( + new ArrowType.Map(true), setValue, MapVector::setNull, values, new ArrowType.Utf8()); } @Test void mapOfInteger() throws SQLException { - TriConsumer> setValue = (mapVector, index, values) -> { - org.apache.arrow.vector.complex.impl.UnionMapWriter mapWriter = mapVector.getWriter(); - mapWriter.setPosition(index); - mapWriter.startMap(); - values.entrySet().forEach(mapValue -> { - if (mapValue != null) { - mapWriter.startEntry(); - mapWriter.key().integer().writeInt(mapValue.getKey()); - mapWriter.value().integer().writeInt(mapValue.getValue()); - mapWriter.endEntry(); - } else { - mapWriter.writeNull(); - } - }); - mapWriter.endMap(); - }; + TriConsumer> setValue = + (mapVector, index, values) -> { + org.apache.arrow.vector.complex.impl.UnionMapWriter mapWriter = mapVector.getWriter(); + mapWriter.setPosition(index); + mapWriter.startMap(); + values + .entrySet() + .forEach( + mapValue -> { + if (mapValue != null) { + mapWriter.startEntry(); + mapWriter.key().integer().writeInt(mapValue.getKey()); + mapWriter.value().integer().writeInt(mapValue.getValue()); + mapWriter.endEntry(); + } else { + mapWriter.writeNull(); + } + }); + mapWriter.endMap(); + }; JsonStringHashMap value1 = new JsonStringHashMap(); value1.put(1, 2); @@ -547,8 +662,10 @@ void mapOfInteger() throws SQLException { JsonStringHashMap value3 = new JsonStringHashMap(); value3.put(Integer.MIN_VALUE, Integer.MAX_VALUE); value3.put(0, 4096); - List> values = Arrays.asList(value1, value2, value3, Collections.emptyMap()); - testMapType(new ArrowType.Map(true), setValue, MapVector::setNull, values, new ArrowType.Int(32, true)); + List> values = + Arrays.asList(value1, value2, value3, Collections.emptyMap()); + testMapType( + new ArrowType.Map(true), setValue, MapVector::setNull, values, new ArrowType.Int(32, true)); } @FunctionalInterface @@ -556,11 +673,16 @@ interface TriConsumer { void accept(T value1, U value2, V value3); } - void testSimpleType(ArrowType arrowType, int jdbcType, TriConsumer setValue, - BiConsumer setNull, List values) throws SQLException { + void testSimpleType( + ArrowType arrowType, + int jdbcType, + TriConsumer setValue, + BiConsumer setNull, + List values) + throws SQLException { Schema schema = new Schema(Collections.singletonList(Field.nullable("field", arrowType))); try (final MockPreparedStatement statement = new MockPreparedStatement(); - final VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator)) { + final VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator)) { final JdbcParameterBinder binder = JdbcParameterBinder.builder(statement, root).bindAll().build(); assertThat(binder.next()).isFalse(); @@ -610,7 +732,7 @@ void testSimpleType(ArrowType arrowType, int jdbcType // Non-nullable (since some types have a specialized binder) schema = new Schema(Collections.singletonList(Field.notNullable("field", arrowType))); try (final MockPreparedStatement statement = new MockPreparedStatement(); - final VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator)) { + final VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator)) { final JdbcParameterBinder binder = JdbcParameterBinder.builder(statement, root).bindAll().build(); assertThat(binder.next()).isFalse(); @@ -650,15 +772,23 @@ void testSimpleType(ArrowType arrowType, int jdbcType } } - void testListType(ArrowType arrowType, TriConsumer setValue, - BiConsumer setNull, List values) throws SQLException { + void testListType( + ArrowType arrowType, + TriConsumer setValue, + BiConsumer setNull, + List values) + throws SQLException { int jdbcType = Types.ARRAY; - Schema schema = new Schema(Collections.singletonList(new Field("field", FieldType.nullable( - new ArrowType.List()), Collections.singletonList( - new Field("element", FieldType.notNullable(arrowType), null) - )))); + Schema schema = + new Schema( + Collections.singletonList( + new Field( + "field", + FieldType.nullable(new ArrowType.List()), + Collections.singletonList( + new Field("element", FieldType.notNullable(arrowType), null))))); try (final MockPreparedStatement statement = new MockPreparedStatement(); - final VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator)) { + final VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator)) { final JdbcParameterBinder binder = JdbcParameterBinder.builder(statement, root).bindAll().build(); assertThat(binder.next()).isFalse(); @@ -706,12 +836,16 @@ void testListType(ArrowType arrowType, TriConsumer void testListType(ArrowType arrowType, TriConsumer void testMapType(ArrowType arrowType, TriConsumer setValue, - BiConsumer setNull, List values, - ArrowType elementType) throws SQLException { + void testMapType( + ArrowType arrowType, + TriConsumer setValue, + BiConsumer setNull, + List values, + ArrowType elementType) + throws SQLException { int jdbcType = Types.VARCHAR; FieldType keyType = new FieldType(false, elementType, null, null); FieldType mapType = new FieldType(false, ArrowType.Struct.INSTANCE, null, null); - Schema schema = new Schema(Collections.singletonList(new Field("field", FieldType.nullable(arrowType), - Collections.singletonList(new Field(MapVector.KEY_NAME, mapType, - Arrays.asList(new Field(MapVector.KEY_NAME, keyType, null), - new Field(MapVector.VALUE_NAME, keyType, null))))))); + Schema schema = + new Schema( + Collections.singletonList( + new Field( + "field", + FieldType.nullable(arrowType), + Collections.singletonList( + new Field( + MapVector.KEY_NAME, + mapType, + Arrays.asList( + new Field(MapVector.KEY_NAME, keyType, null), + new Field(MapVector.VALUE_NAME, keyType, null))))))); try (final MockPreparedStatement statement = new MockPreparedStatement(); - final VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator)) { + final VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator)) { final JdbcParameterBinder binder = JdbcParameterBinder.builder(statement, root).bindAll().build(); assertThat(binder.next()).isFalse(); @@ -810,18 +957,31 @@ void testMapType(ArrowType arrowType, TriConsumer metadata) { + private static Field field( + String name, boolean nullable, ArrowType type, Map metadata) { return new Field(name, new FieldType(nullable, type, null, metadata), Collections.emptyList()); } @@ -94,16 +91,26 @@ private static Map metadata(String... entries) { public void schemaComment() throws Exception { boolean includeMetadata = false; Schema schema = getSchemaWithCommentFromQuery(includeMetadata); - Schema expectedSchema = new Schema(Arrays.asList( - field("ID", false, Types.MinorType.BIGINT.getType(), - metadata("comment", "Record identifier")), - field("NAME", true, Types.MinorType.VARCHAR.getType(), - metadata("comment", "Name of record")), - field("COLUMN1", true, Types.MinorType.BIT.getType(), - metadata()), - field("COLUMNN", true, Types.MinorType.INT.getType(), - metadata("comment", "Informative description of columnN")) - ), metadata("comment", "This is super special table with valuable data")); + Schema expectedSchema = + new Schema( + Arrays.asList( + field( + "ID", + false, + Types.MinorType.BIGINT.getType(), + metadata("comment", "Record identifier")), + field( + "NAME", + true, + Types.MinorType.VARCHAR.getType(), + metadata("comment", "Name of record")), + field("COLUMN1", true, Types.MinorType.BIT.getType(), metadata()), + field( + "COLUMNN", + true, + Types.MinorType.INT.getType(), + metadata("comment", "Informative description of columnN"))), + metadata("comment", "This is super special table with valuable data")); assertThat(schema).isEqualTo(expectedSchema); } @@ -111,47 +118,60 @@ public void schemaComment() throws Exception { public void schemaCommentWithDatabaseMetadata() throws Exception { boolean includeMetadata = true; Schema schema = getSchemaWithCommentFromQuery(includeMetadata); - Schema expectedSchema = new Schema(Arrays.asList( - field("ID", false, Types.MinorType.BIGINT.getType(), - metadata( - "SQL_CATALOG_NAME", "JDBCTOARROWTEST?CHARACTERENCODING=UTF-8", - "SQL_SCHEMA_NAME", "PUBLIC", - "SQL_TABLE_NAME", "TABLE1", - "SQL_COLUMN_NAME", "ID", - "SQL_TYPE", "BIGINT", - "comment", "Record identifier" - )), - field("NAME", true, Types.MinorType.VARCHAR.getType(), - metadata( - "SQL_CATALOG_NAME", "JDBCTOARROWTEST?CHARACTERENCODING=UTF-8", - "SQL_SCHEMA_NAME", "PUBLIC", - "SQL_TABLE_NAME", "TABLE1", - "SQL_COLUMN_NAME", "NAME", - "SQL_TYPE", "CHARACTER VARYING", - "comment", "Name of record")), - field("COLUMN1", true, Types.MinorType.BIT.getType(), - metadata( - "SQL_CATALOG_NAME", "JDBCTOARROWTEST?CHARACTERENCODING=UTF-8", - "SQL_SCHEMA_NAME", "PUBLIC", - "SQL_TABLE_NAME", "TABLE1", - "SQL_COLUMN_NAME", "COLUMN1", - "SQL_TYPE", "BOOLEAN")), - field("COLUMNN", true, Types.MinorType.INT.getType(), - metadata( - "SQL_CATALOG_NAME", "JDBCTOARROWTEST?CHARACTERENCODING=UTF-8", - "SQL_SCHEMA_NAME", "PUBLIC", - "SQL_TABLE_NAME", "TABLE1", - "SQL_COLUMN_NAME", "COLUMNN", - "SQL_TYPE", "INTEGER", - "comment", "Informative description of columnN")) - ), metadata("comment", "This is super special table with valuable data")); + Schema expectedSchema = + new Schema( + Arrays.asList( + field( + "ID", + false, + Types.MinorType.BIGINT.getType(), + metadata( + "SQL_CATALOG_NAME", "JDBCTOARROWTEST?CHARACTERENCODING=UTF-8", + "SQL_SCHEMA_NAME", "PUBLIC", + "SQL_TABLE_NAME", "TABLE1", + "SQL_COLUMN_NAME", "ID", + "SQL_TYPE", "BIGINT", + "comment", "Record identifier")), + field( + "NAME", + true, + Types.MinorType.VARCHAR.getType(), + metadata( + "SQL_CATALOG_NAME", "JDBCTOARROWTEST?CHARACTERENCODING=UTF-8", + "SQL_SCHEMA_NAME", "PUBLIC", + "SQL_TABLE_NAME", "TABLE1", + "SQL_COLUMN_NAME", "NAME", + "SQL_TYPE", "CHARACTER VARYING", + "comment", "Name of record")), + field( + "COLUMN1", + true, + Types.MinorType.BIT.getType(), + metadata( + "SQL_CATALOG_NAME", "JDBCTOARROWTEST?CHARACTERENCODING=UTF-8", + "SQL_SCHEMA_NAME", "PUBLIC", + "SQL_TABLE_NAME", "TABLE1", + "SQL_COLUMN_NAME", "COLUMN1", + "SQL_TYPE", "BOOLEAN")), + field( + "COLUMNN", + true, + Types.MinorType.INT.getType(), + metadata( + "SQL_CATALOG_NAME", "JDBCTOARROWTEST?CHARACTERENCODING=UTF-8", + "SQL_SCHEMA_NAME", "PUBLIC", + "SQL_TABLE_NAME", "TABLE1", + "SQL_COLUMN_NAME", "COLUMNN", + "SQL_TYPE", "INTEGER", + "comment", "Informative description of columnN"))), + metadata("comment", "This is super special table with valuable data")); assertThat(schema).isEqualTo(expectedSchema); /* corresponding Apache Spark DDL after conversion: - ID BIGINT NOT NULL COMMENT 'Record identifier', - NAME STRING COMMENT 'Name of record', - COLUMN1 BOOLEAN, - COLUMNN INT COMMENT 'Informative description of columnN' - */ + ID BIGINT NOT NULL COMMENT 'Record identifier', + NAME STRING COMMENT 'Name of record', + COLUMN1 BOOLEAN, + COLUMNN INT COMMENT 'Informative description of columnN' + */ assertThat(schema).isEqualTo(expectedSchema); } @@ -160,19 +180,25 @@ private Schema getSchemaWithCommentFromQuery(boolean includeMetadata) throws SQL try (Statement statement = conn.createStatement()) { try (ResultSet resultSet = statement.executeQuery("select * from table1")) { ResultSetMetaData resultSetMetaData = resultSet.getMetaData(); - Map> columnCommentByColumnIndex = getColumnComments(metaData, resultSetMetaData); + Map> columnCommentByColumnIndex = + getColumnComments(metaData, resultSetMetaData); String tableName = getTableNameFromResultSetMetaData(resultSetMetaData); String tableComment = getTableComment(metaData, tableName); - JdbcToArrowConfig config = new JdbcToArrowConfigBuilder() - .setAllocator(new RootAllocator()).setSchemaMetadata(Collections.singletonMap(COMMENT, tableComment)) - .setColumnMetadataByColumnIndex(columnCommentByColumnIndex).setIncludeMetadata(includeMetadata).build(); + JdbcToArrowConfig config = + new JdbcToArrowConfigBuilder() + .setAllocator(new RootAllocator()) + .setSchemaMetadata(Collections.singletonMap(COMMENT, tableComment)) + .setColumnMetadataByColumnIndex(columnCommentByColumnIndex) + .setIncludeMetadata(includeMetadata) + .build(); return JdbcToArrowUtils.jdbcToArrowSchema(resultSetMetaData, config); } } } - private String getTableNameFromResultSetMetaData(ResultSetMetaData resultSetMetaData) throws SQLException { + private String getTableNameFromResultSetMetaData(ResultSetMetaData resultSetMetaData) + throws SQLException { Set tablesFromQuery = new HashSet<>(); for (int idx = 1, columnCount = resultSetMetaData.getColumnCount(); idx <= columnCount; idx++) { String tableName = resultSetMetaData.getTableName(idx); @@ -186,11 +212,16 @@ private String getTableNameFromResultSetMetaData(ResultSetMetaData resultSetMeta throw new RuntimeException("Table metadata is absent or ambiguous"); } - private Map> getColumnComments(DatabaseMetaData metaData, - ResultSetMetaData resultSetMetaData) throws SQLException { + private Map> getColumnComments( + DatabaseMetaData metaData, ResultSetMetaData resultSetMetaData) throws SQLException { Map> columnCommentByColumnIndex = new HashMap<>(); - for (int columnIdx = 1, columnCount = resultSetMetaData.getColumnCount(); columnIdx <= columnCount; columnIdx++) { - String columnComment = getColumnComment(metaData, resultSetMetaData.getTableName(columnIdx), + for (int columnIdx = 1, columnCount = resultSetMetaData.getColumnCount(); + columnIdx <= columnCount; + columnIdx++) { + String columnComment = + getColumnComment( + metaData, + resultSetMetaData.getTableName(columnIdx), resultSetMetaData.getColumnName(columnIdx)); if (columnComment != null && !columnComment.isEmpty()) { columnCommentByColumnIndex.put(columnIdx, Collections.singletonMap(COMMENT, columnComment)); @@ -220,7 +251,8 @@ private String getTableComment(DatabaseMetaData metaData, String tableName) thro throw new RuntimeException("Table comment not found"); } - private String getColumnComment(DatabaseMetaData metaData, String tableName, String columnName) throws SQLException { + private String getColumnComment(DatabaseMetaData metaData, String tableName, String columnName) + throws SQLException { try (ResultSet tableMetadata = metaData.getColumns(null, null, tableName, columnName)) { if (tableMetadata.next()) { return tableMetadata.getString("REMARKS"); @@ -228,9 +260,4 @@ private String getColumnComment(DatabaseMetaData metaData, String tableName, Str } return null; } - - private String getExpectedSchema(String expectedResource) throws java.io.IOException, java.net.URISyntaxException { - return new String(Files.readAllBytes(Paths.get(Objects.requireNonNull( - JdbcToArrowCommentMetadataTest.class.getResource(expectedResource)).toURI())), StandardCharsets.UTF_8); - } } diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfigTest.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfigTest.java index 68a681b052cd3..85d6d89d036ff 100644 --- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfigTest.java +++ b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfigTest.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc; import static org.junit.Assert.assertEquals; @@ -28,7 +27,6 @@ import java.util.HashMap; import java.util.Locale; import java.util.TimeZone; - import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; import org.junit.Test; @@ -36,7 +34,8 @@ public class JdbcToArrowConfigTest { private static final BufferAllocator allocator = new RootAllocator(Integer.MAX_VALUE); - private static final Calendar calendar = Calendar.getInstance(TimeZone.getTimeZone("UTC"), Locale.ROOT); + private static final Calendar calendar = + Calendar.getInstance(TimeZone.getTimeZone("UTC"), Locale.ROOT); @Test(expected = NullPointerException.class) public void testConfigNullArguments() { @@ -89,8 +88,8 @@ public void testConfig() { JdbcToArrowConfigBuilder builder = new JdbcToArrowConfigBuilder(allocator, calendar); JdbcToArrowConfig config = builder.build(); - assertTrue(allocator == config.getAllocator()); - assertTrue(calendar == config.getCalendar()); + assertEquals(allocator, config.getAllocator()); + assertEquals(calendar, config.getCalendar()); Calendar newCalendar = Calendar.getInstance(); BufferAllocator newAllocator = new RootAllocator(Integer.SIZE); @@ -98,8 +97,8 @@ public void testConfig() { builder.setAllocator(newAllocator).setCalendar(newCalendar); config = builder.build(); - assertTrue(newAllocator == config.getAllocator()); - assertTrue(newCalendar == config.getCalendar()); + assertEquals(newAllocator, config.getAllocator()); + assertEquals(newCalendar, config.getCalendar()); } @Test @@ -116,13 +115,29 @@ public void testIncludeMetadata() { config = new JdbcToArrowConfigBuilder(allocator, calendar, true).build(); assertTrue(config.shouldIncludeMetadata()); - config = new JdbcToArrowConfig(allocator, calendar, /* include metadata */ true, - /* reuse vector schema root */ true, null, null, JdbcToArrowConfig.NO_LIMIT_BATCH_SIZE, null); + config = + new JdbcToArrowConfig( + allocator, + calendar, /* include metadata */ + true, + /* reuse vector schema root */ true, + null, + null, + JdbcToArrowConfig.NO_LIMIT_BATCH_SIZE, + null); assertTrue(config.shouldIncludeMetadata()); assertTrue(config.isReuseVectorSchemaRoot()); - config = new JdbcToArrowConfig(allocator, calendar, /* include metadata */ false, - /* reuse vector schema root */ false, null, null, JdbcToArrowConfig.NO_LIMIT_BATCH_SIZE, null); + config = + new JdbcToArrowConfig( + allocator, + calendar, /* include metadata */ + false, + /* reuse vector schema root */ false, + null, + null, + JdbcToArrowConfig.NO_LIMIT_BATCH_SIZE, + null); assertFalse(config.shouldIncludeMetadata()); assertFalse(config.isReuseVectorSchemaRoot()); } diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/JdbcToArrowTestHelper.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/JdbcToArrowTestHelper.java index 91f2f465dd989..375463d6fd5d4 100644 --- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/JdbcToArrowTestHelper.java +++ b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/JdbcToArrowTestHelper.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc; import static org.junit.Assert.assertArrayEquals; @@ -22,8 +21,12 @@ import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertTrue; +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.core.type.TypeReference; +import com.fasterxml.jackson.databind.ObjectMapper; import java.math.BigDecimal; import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; import java.sql.ResultSetMetaData; import java.sql.SQLException; import java.util.AbstractMap; @@ -31,7 +34,6 @@ import java.util.HashMap; import java.util.List; import java.util.Map; - import org.apache.arrow.vector.BaseValueVector; import org.apache.arrow.vector.BigIntVector; import org.apache.arrow.vector.BitVector; @@ -57,12 +59,9 @@ import org.apache.arrow.vector.util.ObjectMapperFactory; import org.apache.arrow.vector.util.Text; -import com.fasterxml.jackson.core.JsonProcessingException; -import com.fasterxml.jackson.core.type.TypeReference; -import com.fasterxml.jackson.databind.ObjectMapper; - /** - * This is a Helper class which has functionalities to read and assert the values from the given FieldVector object. + * This is a Helper class which has functionalities to read and assert the values from the given + * FieldVector object. */ public class JdbcToArrowTestHelper { @@ -78,7 +77,8 @@ public static void assertIntVectorValues(IntVector intVector, int rowCount, Inte } } - public static void assertBooleanVectorValues(BitVector bitVector, int rowCount, Boolean[] values) { + public static void assertBooleanVectorValues( + BitVector bitVector, int rowCount, Boolean[] values) { assertEquals(rowCount, bitVector.getValueCount()); for (int j = 0; j < bitVector.getValueCount(); j++) { @@ -102,7 +102,8 @@ public static void assertBitVectorValues(BitVector bitVector, int rowCount, Inte } } - public static void assertTinyIntVectorValues(TinyIntVector tinyIntVector, int rowCount, Integer[] values) { + public static void assertTinyIntVectorValues( + TinyIntVector tinyIntVector, int rowCount, Integer[] values) { assertEquals(rowCount, tinyIntVector.getValueCount()); for (int j = 0; j < tinyIntVector.getValueCount(); j++) { @@ -114,7 +115,8 @@ public static void assertTinyIntVectorValues(TinyIntVector tinyIntVector, int ro } } - public static void assertSmallIntVectorValues(SmallIntVector smallIntVector, int rowCount, Integer[] values) { + public static void assertSmallIntVectorValues( + SmallIntVector smallIntVector, int rowCount, Integer[] values) { assertEquals(rowCount, smallIntVector.getValueCount()); for (int j = 0; j < smallIntVector.getValueCount(); j++) { @@ -126,7 +128,8 @@ public static void assertSmallIntVectorValues(SmallIntVector smallIntVector, int } } - public static void assertBigIntVectorValues(BigIntVector bigIntVector, int rowCount, Long[] values) { + public static void assertBigIntVectorValues( + BigIntVector bigIntVector, int rowCount, Long[] values) { assertEquals(rowCount, bigIntVector.getValueCount()); for (int j = 0; j < bigIntVector.getValueCount(); j++) { @@ -138,7 +141,8 @@ public static void assertBigIntVectorValues(BigIntVector bigIntVector, int rowCo } } - public static void assertDecimalVectorValues(DecimalVector decimalVector, int rowCount, BigDecimal[] values) { + public static void assertDecimalVectorValues( + DecimalVector decimalVector, int rowCount, BigDecimal[] values) { assertEquals(rowCount, decimalVector.getValueCount()); for (int j = 0; j < decimalVector.getValueCount(); j++) { @@ -150,7 +154,8 @@ public static void assertDecimalVectorValues(DecimalVector decimalVector, int ro } } - public static void assertFloat8VectorValues(Float8Vector float8Vector, int rowCount, Double[] values) { + public static void assertFloat8VectorValues( + Float8Vector float8Vector, int rowCount, Double[] values) { assertEquals(rowCount, float8Vector.getValueCount()); for (int j = 0; j < float8Vector.getValueCount(); j++) { @@ -162,7 +167,8 @@ public static void assertFloat8VectorValues(Float8Vector float8Vector, int rowCo } } - public static void assertFloat4VectorValues(Float4Vector float4Vector, int rowCount, Float[] values) { + public static void assertFloat4VectorValues( + Float4Vector float4Vector, int rowCount, Float[] values) { assertEquals(rowCount, float4Vector.getValueCount()); for (int j = 0; j < float4Vector.getValueCount(); j++) { @@ -174,7 +180,8 @@ public static void assertFloat4VectorValues(Float4Vector float4Vector, int rowCo } } - public static void assertTimeVectorValues(TimeMilliVector timeMilliVector, int rowCount, Long[] values) { + public static void assertTimeVectorValues( + TimeMilliVector timeMilliVector, int rowCount, Long[] values) { assertEquals(rowCount, timeMilliVector.getValueCount()); for (int j = 0; j < timeMilliVector.getValueCount(); j++) { @@ -186,7 +193,8 @@ public static void assertTimeVectorValues(TimeMilliVector timeMilliVector, int r } } - public static void assertDateVectorValues(DateDayVector dateDayVector, int rowCount, Integer[] values) { + public static void assertDateVectorValues( + DateDayVector dateDayVector, int rowCount, Integer[] values) { assertEquals(rowCount, dateDayVector.getValueCount()); for (int j = 0; j < dateDayVector.getValueCount(); j++) { @@ -198,7 +206,8 @@ public static void assertDateVectorValues(DateDayVector dateDayVector, int rowCo } } - public static void assertTimeStampVectorValues(TimeStampVector timeStampVector, int rowCount, Long[] values) { + public static void assertTimeStampVectorValues( + TimeStampVector timeStampVector, int rowCount, Long[] values) { assertEquals(rowCount, timeStampVector.getValueCount()); for (int j = 0; j < timeStampVector.getValueCount(); j++) { @@ -210,7 +219,8 @@ public static void assertTimeStampVectorValues(TimeStampVector timeStampVector, } } - public static void assertVarBinaryVectorValues(VarBinaryVector varBinaryVector, int rowCount, byte[][] values) { + public static void assertVarBinaryVectorValues( + VarBinaryVector varBinaryVector, int rowCount, byte[][] values) { assertEquals(rowCount, varBinaryVector.getValueCount()); for (int j = 0; j < varBinaryVector.getValueCount(); j++) { @@ -222,7 +232,8 @@ public static void assertVarBinaryVectorValues(VarBinaryVector varBinaryVector, } } - public static void assertVarcharVectorValues(VarCharVector varCharVector, int rowCount, byte[][] values) { + public static void assertVarcharVectorValues( + VarCharVector varCharVector, int rowCount, byte[][] values) { assertEquals(rowCount, varCharVector.getValueCount()); for (int j = 0; j < varCharVector.getValueCount(); j++) { @@ -238,7 +249,8 @@ public static void assertNullVectorValues(NullVector vector, int rowCount) { assertEquals(rowCount, vector.getValueCount()); } - public static void assertListVectorValues(ListVector listVector, int rowCount, Integer[][] values) { + public static void assertListVectorValues( + ListVector listVector, int rowCount, Integer[][] values) { assertEquals(rowCount, listVector.getValueCount()); for (int j = 0; j < listVector.getValueCount(); j++) { @@ -251,7 +263,8 @@ public static void assertListVectorValues(ListVector listVector, int rowCount, I } } - public static void assertMapVectorValues(MapVector mapVector, int rowCount, Map[] values) { + public static void assertMapVectorValues( + MapVector mapVector, int rowCount, Map[] values) { assertEquals(rowCount, mapVector.getValueCount()); for (int j = 0; j < mapVector.getValueCount(); j++) { @@ -262,10 +275,17 @@ public static void assertMapVectorValues(MapVector mapVector, int rowCount, Map< (JsonStringArrayList>) mapVector.getObject(j); Map actualMap = null; if (actualSource != null && !actualSource.isEmpty()) { - actualMap = actualSource.stream().map(entry -> - new AbstractMap.SimpleEntry<>(entry.get("key").toString(), - entry.get("value") != null ? entry.get("value").toString() : null)) - .collect(HashMap::new, (collector, val) -> collector.put(val.getKey(), val.getValue()), HashMap::putAll); + actualMap = + actualSource.stream() + .map( + entry -> + new AbstractMap.SimpleEntry<>( + entry.get("key").toString(), + entry.get("value") != null ? entry.get("value").toString() : null)) + .collect( + HashMap::new, + (collector, val) -> collector.put(val.getKey(), val.getValue()), + HashMap::putAll); } assertEquals(values[j], actualMap); } @@ -309,8 +329,8 @@ public static void assertFieldMetadataIsEmpty(VectorSchemaRoot schema) { } } - public static void assertFieldMetadataMatchesResultSetMetadata(ResultSetMetaData rsmd, Schema schema) - throws SQLException { + public static void assertFieldMetadataMatchesResultSetMetadata( + ResultSetMetaData rsmd, Schema schema) throws SQLException { assertNotNull(schema); assertNotNull(schema.getFields()); assertNotNull(rsmd); @@ -399,12 +419,14 @@ public static byte[][] getCharArray(String[] values, String dataType) { byte[][] valueArr = new byte[dataArr.length][]; int i = 0; for (String data : dataArr) { - valueArr[i++] = "null".equals(data.trim()) ? null : data.trim().getBytes(); + valueArr[i++] = + "null".equals(data.trim()) ? null : data.trim().getBytes(StandardCharsets.UTF_8); } return valueArr; } - public static byte[][] getCharArrayWithCharSet(String[] values, String dataType, Charset charSet) { + public static byte[][] getCharArrayWithCharSet( + String[] values, String dataType, Charset charSet) { String[] dataArr = getValues(values, dataType); byte[][] valueArr = new byte[dataArr.length][]; int i = 0; @@ -419,11 +441,13 @@ public static byte[][] getBinaryValues(String[] values, String dataType) { byte[][] valueArr = new byte[dataArr.length][]; int i = 0; for (String data : dataArr) { - valueArr[i++] = "null".equals(data.trim()) ? null : data.trim().getBytes(); + valueArr[i++] = + "null".equals(data.trim()) ? null : data.trim().getBytes(StandardCharsets.UTF_8); } return valueArr; } + @SuppressWarnings("StringSplitter") public static String[] getValues(String[] values, String dataType) { String value = ""; for (String val : values) { @@ -440,6 +464,7 @@ public static Integer[][] getListValues(String[] values, String dataType) { return getListValues(dataArr); } + @SuppressWarnings("StringSplitter") public static Integer[][] getListValues(String[] dataArr) { Integer[][] valueArr = new Integer[dataArr.length][]; int i = 0; diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/MockPreparedStatement.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/MockPreparedStatement.java index 4478cdfbee6f7..8dfc684e22f24 100644 --- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/MockPreparedStatement.java +++ b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/MockPreparedStatement.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc; import java.io.InputStream; @@ -231,8 +230,7 @@ public void setDate(int parameterIndex, Date x, Calendar cal) throws SQLExceptio } @Override - public void setTime(int parameterIndex, Time x, Calendar cal) throws SQLException { - } + public void setTime(int parameterIndex, Time x, Calendar cal) throws SQLException {} @Override public void setTimestamp(int parameterIndex, Timestamp x, Calendar cal) throws SQLException { @@ -242,8 +240,7 @@ public void setTimestamp(int parameterIndex, Timestamp x, Calendar cal) throws S } @Override - public void setNull(int parameterIndex, int sqlType, String typeName) throws SQLException { - } + public void setNull(int parameterIndex, int sqlType, String typeName) throws SQLException {} @Override public void setURL(int parameterIndex, URL x) throws SQLException { @@ -261,80 +258,62 @@ public void setRowId(int parameterIndex, RowId x) throws SQLException { } @Override - public void setNString(int parameterIndex, String value) throws SQLException { - } + public void setNString(int parameterIndex, String value) throws SQLException {} @Override public void setNCharacterStream(int parameterIndex, Reader value, long length) - throws SQLException { - } + throws SQLException {} @Override - public void setNClob(int parameterIndex, NClob value) throws SQLException { - } + public void setNClob(int parameterIndex, NClob value) throws SQLException {} @Override - public void setClob(int parameterIndex, Reader reader, long length) throws SQLException { - } + public void setClob(int parameterIndex, Reader reader, long length) throws SQLException {} @Override public void setBlob(int parameterIndex, InputStream inputStream, long length) - throws SQLException { - } + throws SQLException {} @Override - public void setNClob(int parameterIndex, Reader reader, long length) throws SQLException { - } + public void setNClob(int parameterIndex, Reader reader, long length) throws SQLException {} @Override - public void setSQLXML(int parameterIndex, SQLXML xmlObject) throws SQLException { - } + public void setSQLXML(int parameterIndex, SQLXML xmlObject) throws SQLException {} @Override public void setObject(int parameterIndex, Object x, int targetSqlType, int scaleOrLength) - throws SQLException { - } + throws SQLException {} @Override - public void setAsciiStream(int parameterIndex, InputStream x, long length) throws SQLException { - } + public void setAsciiStream(int parameterIndex, InputStream x, long length) throws SQLException {} @Override - public void setBinaryStream(int parameterIndex, InputStream x, long length) throws SQLException { - } + public void setBinaryStream(int parameterIndex, InputStream x, long length) throws SQLException {} @Override public void setCharacterStream(int parameterIndex, Reader reader, long length) - throws SQLException { - } + throws SQLException {} @Override - public void setAsciiStream(int parameterIndex, InputStream x) throws SQLException { - } + public void setAsciiStream(int parameterIndex, InputStream x) throws SQLException {} @Override - public void setBinaryStream(int parameterIndex, InputStream x) throws SQLException { - } + public void setBinaryStream(int parameterIndex, InputStream x) throws SQLException {} @Override - public void setCharacterStream(int parameterIndex, Reader reader) throws SQLException { - } + public void setCharacterStream(int parameterIndex, Reader reader) throws SQLException {} @Override - public void setNCharacterStream(int parameterIndex, Reader value) throws SQLException { - } + public void setNCharacterStream(int parameterIndex, Reader value) throws SQLException {} @Override - public void setClob(int parameterIndex, Reader reader) throws SQLException { - } + public void setClob(int parameterIndex, Reader reader) throws SQLException {} @Override - public void setBlob(int parameterIndex, InputStream inputStream) throws SQLException { - } + public void setBlob(int parameterIndex, InputStream inputStream) throws SQLException {} @Override - public void setNClob(int parameterIndex, Reader reader) throws SQLException { - } + public void setNClob(int parameterIndex, Reader reader) throws SQLException {} @Override public ResultSet executeQuery(String sql) throws SQLException { @@ -347,8 +326,7 @@ public int executeUpdate(String sql) throws SQLException { } @Override - public void close() throws SQLException { - } + public void close() throws SQLException {} @Override public int getMaxFieldSize() throws SQLException { diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/ResultSetUtility.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/ResultSetUtility.java index ccc7681c5bc8b..5f5f6dcb98d43 100644 --- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/ResultSetUtility.java +++ b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/ResultSetUtility.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc; import java.io.InputStream; @@ -48,13 +47,11 @@ public class ResultSetUtility { public static ResultSet generateEmptyResultSet() throws SQLException { MockDataElement element = new MockDataElement("string_example"); MockResultSetMetaData.MockColumnMetaData columnMetaData = - MockResultSetMetaData.MockColumnMetaData.fromDataElement(element, 1); + MockResultSetMetaData.MockColumnMetaData.fromDataElement(element, 1); ArrayList cols = new ArrayList<>(); cols.add(columnMetaData); ResultSetMetaData metadata = new MockResultSetMetaData(cols); - return MockResultSet.builder() - .setMetaData(metadata) - .build(); + return MockResultSet.builder().setMetaData(metadata).build(); } public static MockResultSet generateBasicResultSet(int rows) throws SQLException { @@ -66,17 +63,17 @@ public static MockResultSet generateBasicResultSet(int rows) throws SQLException } public static class MockResultSet extends ThrowingResultSet { - private final ArrayList rows; + private final List rows; private int index = 0; private boolean isClosed = false; private ResultSetMetaData metadata; private boolean wasNull; - public MockResultSet(ArrayList rows) throws SQLException { + public MockResultSet(List rows) throws SQLException { this(rows, MockResultSetMetaData.fromRows(rows)); } - public MockResultSet(ArrayList rows, ResultSetMetaData metadata) { + public MockResultSet(List rows, ResultSetMetaData metadata) { this.rows = rows; this.metadata = metadata; this.wasNull = false; @@ -252,8 +249,8 @@ public Builder addDataElement(Object val, int sqlType) { return this.addDataElement(new MockDataElement(val, sqlType)); } - public Builder setMetaData(ResultSetMetaData metaData) { - this.metadata = metaData; + public Builder setMetaData(ResultSetMetaData metadata) { + this.metadata = metadata; return this; } @@ -318,16 +315,20 @@ public String getColumnTypeName(int column) throws SQLException { return columns.get(column - 1).getTypeName(); } - public static MockResultSetMetaData fromRows(ArrayList rows) throws SQLException { - // Note: This attempts to dynamically construct ResultSetMetaData from the first row in a given result set. - // If there are now rows, or the result set contains no columns, this cannot be dynamically generated and + public static MockResultSetMetaData fromRows(List rows) throws SQLException { + // Note: This attempts to dynamically construct ResultSetMetaData from the first row in a + // given result set. + // If there are now rows, or the result set contains no columns, this cannot be dynamically + // generated and // an exception will be thrown. if (rows.size() == 0) { - throw new SQLException("Unable to dynamically generate ResultSetMetaData because row count is zero!"); + throw new SQLException( + "Unable to dynamically generate ResultSetMetaData because row count is zero!"); } MockRow firstRow = rows.get(0); if (firstRow.dataElements.size() == 0) { - throw new SQLException("Unable to dynamically generate ResultSetMetaData because column count is zero!"); + throw new SQLException( + "Unable to dynamically generate ResultSetMetaData because column count is zero!"); } ArrayList columns = new ArrayList<>(); for (int i = 0; i < firstRow.dataElements.size(); i++) { @@ -338,7 +339,6 @@ public static MockResultSetMetaData fromRows(ArrayList rows) throws SQL } public static class MockColumnMetaData { - private int index; private int sqlType; private int precision; private int scale; @@ -347,9 +347,7 @@ public static class MockColumnMetaData { private String typeName; private int displaySize; - - private MockColumnMetaData() { - } + private MockColumnMetaData() {} private String getLabel() { return label; @@ -383,17 +381,17 @@ private int getDisplaySize() { return displaySize; } - public static MockColumnMetaData fromDataElement(MockDataElement element, int i) throws SQLException { + public static MockColumnMetaData fromDataElement(MockDataElement element, int i) + throws SQLException { return MockColumnMetaData.builder() - .index(i) - .sqlType(element.getSqlType()) - .precision(element.getPrecision()) - .scale(element.getScale()) - .nullable(element.isNullable()) - .setTypeName("TYPE") - .setDisplaySize(420) - .label("col_" + i) - .build(); + .sqlType(element.getSqlType()) + .precision(element.getPrecision()) + .scale(element.getScale()) + .nullable(element.isNullable()) + .setTypeName("TYPE") + .setDisplaySize(420) + .label("col_" + i) + .build(); } public static Builder builder() { @@ -403,11 +401,6 @@ public static Builder builder() { public static class Builder { private MockColumnMetaData columnMetaData = new MockColumnMetaData(); - public Builder index(int index) { - this.columnMetaData.index = index; - return this; - } - public Builder label(String label) { this.columnMetaData.label = label; return this; @@ -447,15 +440,13 @@ public MockColumnMetaData build() { return this.columnMetaData; } } - } - } public static class MockRow { - private final ArrayList dataElements; + private final List dataElements; - public MockRow(ArrayList elements) { + public MockRow(List elements) { this.dataElements = elements; } @@ -642,7 +633,6 @@ public short getShort() throws SQLException { } } - public static class ThrowingResultSet implements ResultSet { @Override @@ -1146,17 +1136,20 @@ public void updateTimestamp(String columnLabel, Timestamp x) throws SQLException } @Override - public void updateAsciiStream(String columnLabel, InputStream x, int length) throws SQLException { + public void updateAsciiStream(String columnLabel, InputStream x, int length) + throws SQLException { throw getExceptionToThrow(); } @Override - public void updateBinaryStream(String columnLabel, InputStream x, int length) throws SQLException { + public void updateBinaryStream(String columnLabel, InputStream x, int length) + throws SQLException { throw getExceptionToThrow(); } @Override - public void updateCharacterStream(String columnLabel, Reader reader, int length) throws SQLException { + public void updateCharacterStream(String columnLabel, Reader reader, int length) + throws SQLException { throw getExceptionToThrow(); } @@ -1446,7 +1439,8 @@ public void updateNCharacterStream(int columnIndex, Reader x, long length) throw } @Override - public void updateNCharacterStream(String columnLabel, Reader reader, long length) throws SQLException { + public void updateNCharacterStream(String columnLabel, Reader reader, long length) + throws SQLException { throw getExceptionToThrow(); } @@ -1456,7 +1450,8 @@ public void updateAsciiStream(int columnIndex, InputStream x, long length) throw } @Override - public void updateBinaryStream(int columnIndex, InputStream x, long length) throws SQLException { + public void updateBinaryStream(int columnIndex, InputStream x, long length) + throws SQLException { throw getExceptionToThrow(); } @@ -1466,27 +1461,32 @@ public void updateCharacterStream(int columnIndex, Reader x, long length) throws } @Override - public void updateAsciiStream(String columnLabel, InputStream x, long length) throws SQLException { + public void updateAsciiStream(String columnLabel, InputStream x, long length) + throws SQLException { throw getExceptionToThrow(); } @Override - public void updateBinaryStream(String columnLabel, InputStream x, long length) throws SQLException { + public void updateBinaryStream(String columnLabel, InputStream x, long length) + throws SQLException { throw getExceptionToThrow(); } @Override - public void updateCharacterStream(String columnLabel, Reader reader, long length) throws SQLException { + public void updateCharacterStream(String columnLabel, Reader reader, long length) + throws SQLException { throw getExceptionToThrow(); } @Override - public void updateBlob(int columnIndex, InputStream inputStream, long length) throws SQLException { + public void updateBlob(int columnIndex, InputStream inputStream, long length) + throws SQLException { throw getExceptionToThrow(); } @Override - public void updateBlob(String columnLabel, InputStream inputStream, long length) throws SQLException { + public void updateBlob(String columnLabel, InputStream inputStream, long length) + throws SQLException { throw getExceptionToThrow(); } @@ -1591,13 +1591,14 @@ public T getObject(String columnLabel, Class type) throws SQLException { } @Override - public void updateObject(int columnIndex, Object x, SQLType targetSqlType, int scaleOrLength) throws SQLException { + public void updateObject(int columnIndex, Object x, SQLType targetSqlType, int scaleOrLength) + throws SQLException { throw getExceptionToThrow(); } @Override public void updateObject(String columnLabel, Object x, SQLType targetSqlType, int scaleOrLength) - throws SQLException { + throws SQLException { throw getExceptionToThrow(); } @@ -1607,7 +1608,8 @@ public void updateObject(int columnIndex, Object x, SQLType targetSqlType) throw } @Override - public void updateObject(String columnLabel, Object x, SQLType targetSqlType) throws SQLException { + public void updateObject(String columnLabel, Object x, SQLType targetSqlType) + throws SQLException { throw getExceptionToThrow(); } @@ -1630,7 +1632,6 @@ private static SQLException getExceptionToThrow(String message) { return new SQLException(message); } - public static class ThrowingResultSetMetaData implements ResultSetMetaData { @Override public int getColumnCount() throws SQLException { diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/ResultSetUtilityTest.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/ResultSetUtilityTest.java index 2424ed625248d..8af2c06f4de54 100644 --- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/ResultSetUtilityTest.java +++ b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/ResultSetUtilityTest.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc; import static org.junit.Assert.assertEquals; @@ -26,7 +25,6 @@ import java.sql.ResultSet; import java.sql.SQLException; import java.sql.Types; - import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; import org.apache.arrow.vector.VectorSchemaRoot; @@ -36,20 +34,24 @@ public class ResultSetUtilityTest { @Test public void testZeroRowResultSet() throws Exception { - for (boolean reuseVectorSchemaRoot : new boolean[]{false, true}) { + for (boolean reuseVectorSchemaRoot : new boolean[] {false, true}) { try (BufferAllocator allocator = new RootAllocator(Integer.MAX_VALUE)) { ResultSet rs = ResultSetUtility.generateEmptyResultSet(); - JdbcToArrowConfig config = new JdbcToArrowConfigBuilder( - allocator, JdbcToArrowUtils.getUtcCalendar(), /* include metadata */ false) - .setReuseVectorSchemaRoot(reuseVectorSchemaRoot) - .build(); + JdbcToArrowConfig config = + new JdbcToArrowConfigBuilder( + allocator, JdbcToArrowUtils.getUtcCalendar(), /* include metadata */ false) + .setReuseVectorSchemaRoot(reuseVectorSchemaRoot) + .build(); ArrowVectorIterator iter = JdbcToArrow.sqlToArrowVectorIterator(rs, config); assertTrue("Iterator on zero row ResultSet should haveNext() before use", iter.hasNext()); VectorSchemaRoot root = iter.next(); assertNotNull("VectorSchemaRoot from first next() result should never be null", root); - assertEquals("VectorSchemaRoot from empty ResultSet should have zero rows", 0, root.getRowCount()); - assertFalse("hasNext() should return false on empty ResultSets after initial next() call", iter.hasNext()); + assertEquals( + "VectorSchemaRoot from empty ResultSet should have zero rows", 0, root.getRowCount()); + assertFalse( + "hasNext() should return false on empty ResultSets after initial next() call", + iter.hasNext()); } } } @@ -99,7 +101,8 @@ public void testBasicResultSet() throws Exception { @Test public void testMockDataTypes() throws SQLException { - ResultSetUtility.MockDataElement element = new ResultSetUtility.MockDataElement(1L, Types.NUMERIC); + ResultSetUtility.MockDataElement element = + new ResultSetUtility.MockDataElement(1L, Types.NUMERIC); assertEquals(1L, element.getLong()); assertEquals(1, element.getInt()); assertEquals("1", element.getString()); diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/Table.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/Table.java index 50c4fe6db2a14..7fa8188a99158 100644 --- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/Table.java +++ b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/Table.java @@ -14,17 +14,13 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc; +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; import java.math.BigDecimal; import java.nio.charset.StandardCharsets; -import com.fasterxml.jackson.annotation.JsonIgnoreProperties; - -/** - * POJO to handle the YAML data from the test YAML file. - */ +/** POJO to handle the YAML data from the test YAML file. */ @JsonIgnoreProperties(ignoreUnknown = true) public class Table { private String name; @@ -39,8 +35,7 @@ public class Table { private String[] vectors; private int rowCount; - public Table() { - } + public Table() {} public String getName() { return name; diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/UnreliableMetaDataTest.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/UnreliableMetaDataTest.java index 3eb886faabc10..93ba028e39629 100644 --- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/UnreliableMetaDataTest.java +++ b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/UnreliableMetaDataTest.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc; import static org.junit.Assert.assertEquals; @@ -34,7 +33,6 @@ import java.util.Collections; import java.util.HashMap; import java.util.Map; - import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; import org.apache.arrow.vector.IntVector; @@ -48,9 +46,7 @@ import org.junit.runner.RunWith; import org.junit.runners.Parameterized; -/** - * Test options for dealing with unreliable ResultSetMetaData from JDBC drivers. - */ +/** Test options for dealing with unreliable ResultSetMetaData from JDBC drivers. */ @RunWith(Parameterized.class) public class UnreliableMetaDataTest { private final boolean reuseVectorSchemaRoot; @@ -72,7 +68,7 @@ public void afterEach() { @Parameterized.Parameters(name = "reuseVectorSchemaRoot = {0}") public static Collection getTestData() { - return Arrays.asList(new Object[][] { {false}, {true} }); + return Arrays.asList(new Object[][] {{false}, {true}}); } @Test @@ -91,13 +87,15 @@ public void testUnreliableMetaDataPrecisionAndScale() throws Exception { // reset the ResultSet: rs.beforeFirst(); - JdbcToArrowConfig config = new JdbcToArrowConfigBuilder( - allocator, JdbcToArrowUtils.getUtcCalendar(), /* include metadata */ false) - .setReuseVectorSchemaRoot(reuseVectorSchemaRoot) - .build(); + JdbcToArrowConfig config = + new JdbcToArrowConfigBuilder( + allocator, JdbcToArrowUtils.getUtcCalendar(), /* include metadata */ false) + .setReuseVectorSchemaRoot(reuseVectorSchemaRoot) + .build(); try (ArrowVectorIterator iter = JdbcToArrow.sqlToArrowVectorIterator(rs, config)) { assertTrue(iter.hasNext()); - assertThrows(RuntimeException.class, iter::next, "Expected to fail due to mismatched metadata!"); + assertThrows( + RuntimeException.class, iter::next, "Expected to fail due to mismatched metadata!"); } // reset the ResultSet: @@ -105,11 +103,12 @@ public void testUnreliableMetaDataPrecisionAndScale() throws Exception { JdbcFieldInfo explicitMappingField = new JdbcFieldInfo(Types.DECIMAL, 18, 2); Map explicitMapping = new HashMap<>(); explicitMapping.put(1, explicitMappingField); - config = new JdbcToArrowConfigBuilder( - allocator, JdbcToArrowUtils.getUtcCalendar(), /* include metadata */ false) - .setReuseVectorSchemaRoot(reuseVectorSchemaRoot) - .setExplicitTypesByColumnIndex(explicitMapping) - .build(); + config = + new JdbcToArrowConfigBuilder( + allocator, JdbcToArrowUtils.getUtcCalendar(), /* include metadata */ false) + .setReuseVectorSchemaRoot(reuseVectorSchemaRoot) + .setExplicitTypesByColumnIndex(explicitMapping) + .build(); try (ArrowVectorIterator iter = JdbcToArrow.sqlToArrowVectorIterator(rs, config)) { while (iter.hasNext()) { @@ -133,7 +132,8 @@ public void testInconsistentPrecisionAndScale() throws Exception { assertEquals("Value precision should be 18", 18, bd1.precision()); rs.next(); BigDecimal bd2 = rs.getBigDecimal(1); - assertEquals("Value should be 1000000000300.0000001", new BigDecimal("1000000000300.0000001"), bd2); + assertEquals( + "Value should be 1000000000300.0000001", new BigDecimal("1000000000300.0000001"), bd2); assertEquals("Value scale should be 7", 7, bd2.scale()); assertEquals("Value precision should be 20", 20, bd2.precision()); rs.beforeFirst(); @@ -141,23 +141,27 @@ public void testInconsistentPrecisionAndScale() throws Exception { Map explicitMapping = new HashMap<>(); explicitMapping.put(1, explicitMappingField); - JdbcToArrowConfig config = new JdbcToArrowConfigBuilder( - allocator, JdbcToArrowUtils.getUtcCalendar(), /* include metadata */ false) - .setReuseVectorSchemaRoot(reuseVectorSchemaRoot) - .setExplicitTypesByColumnIndex(explicitMapping) - .build(); + JdbcToArrowConfig config = + new JdbcToArrowConfigBuilder( + allocator, JdbcToArrowUtils.getUtcCalendar(), /* include metadata */ false) + .setReuseVectorSchemaRoot(reuseVectorSchemaRoot) + .setExplicitTypesByColumnIndex(explicitMapping) + .build(); try (ArrowVectorIterator iter = JdbcToArrow.sqlToArrowVectorIterator(rs, config)) { assertTrue(iter.hasNext()); - assertThrows(RuntimeException.class, iter::next, + assertThrows( + RuntimeException.class, + iter::next, "This is expected to fail due to inconsistent BigDecimal scales, while strict matching is enabled."); } // Reuse same ResultSet, with RoundingMode.UNNECESSARY set to coerce BigDecimal scale as needed: - config = new JdbcToArrowConfigBuilder( - allocator, JdbcToArrowUtils.getUtcCalendar(), /* include metadata */ false) - .setReuseVectorSchemaRoot(reuseVectorSchemaRoot) - .setExplicitTypesByColumnIndex(explicitMapping) - .setBigDecimalRoundingMode(RoundingMode.UNNECESSARY) - .build(); + config = + new JdbcToArrowConfigBuilder( + allocator, JdbcToArrowUtils.getUtcCalendar(), /* include metadata */ false) + .setReuseVectorSchemaRoot(reuseVectorSchemaRoot) + .setExplicitTypesByColumnIndex(explicitMapping) + .setBigDecimalRoundingMode(RoundingMode.UNNECESSARY) + .build(); try (ArrowVectorIterator iter = JdbcToArrow.sqlToArrowVectorIterator(rs, config)) { while (iter.hasNext()) { VectorSchemaRoot root = iter.next(); @@ -171,27 +175,32 @@ public void testIncorrectNullability() throws Exception { // ARROW-17005: ResultSetMetaData may indicate a field is non-nullable even when there are nulls ResultSetUtility.MockResultSetMetaData.MockColumnMetaData columnMetaData = ResultSetUtility.MockResultSetMetaData.MockColumnMetaData.builder() - .index(1) .sqlType(Types.INTEGER) .nullable(ResultSetMetaData.columnNoNulls) .build(); - ResultSetMetaData metadata = new ResultSetUtility.MockResultSetMetaData(Collections.singletonList(columnMetaData)); - final ResultSetUtility.MockResultSet.Builder resultSetBuilder = ResultSetUtility.MockResultSet.builder() - .setMetaData(metadata) - .addDataElement(new ResultSetUtility.MockDataElement(1024, Types.INTEGER)) - .finishRow() - .addDataElement(new ResultSetUtility.MockDataElement(null, Types.INTEGER)) - .finishRow(); - final Schema notNullSchema = new Schema( - Collections.singletonList(Field.notNullable(/*name=*/null, new ArrowType.Int(32, true)))); - final Schema nullSchema = new Schema( - Collections.singletonList(Field.nullable(/*name=*/null, new ArrowType.Int(32, true)))); + ResultSetMetaData metadata = + new ResultSetUtility.MockResultSetMetaData(Collections.singletonList(columnMetaData)); + final ResultSetUtility.MockResultSet.Builder resultSetBuilder = + ResultSetUtility.MockResultSet.builder() + .setMetaData(metadata) + .addDataElement(new ResultSetUtility.MockDataElement(1024, Types.INTEGER)) + .finishRow() + .addDataElement(new ResultSetUtility.MockDataElement(null, Types.INTEGER)) + .finishRow(); + final Schema notNullSchema = + new Schema( + Collections.singletonList( + Field.notNullable(/*name=*/ null, new ArrowType.Int(32, true)))); + final Schema nullSchema = + new Schema( + Collections.singletonList(Field.nullable(/*name=*/ null, new ArrowType.Int(32, true)))); try (final ResultSet rs = resultSetBuilder.build()) { - JdbcToArrowConfig config = new JdbcToArrowConfigBuilder( - allocator, JdbcToArrowUtils.getUtcCalendar(), /* include metadata */ false) - .setReuseVectorSchemaRoot(reuseVectorSchemaRoot) - .build(); + JdbcToArrowConfig config = + new JdbcToArrowConfigBuilder( + allocator, JdbcToArrowUtils.getUtcCalendar(), /* include metadata */ false) + .setReuseVectorSchemaRoot(reuseVectorSchemaRoot) + .build(); try (ArrowVectorIterator iter = JdbcToArrow.sqlToArrowVectorIterator(rs, config)) { assertTrue(iter.hasNext()); final VectorSchemaRoot root = iter.next(); @@ -209,14 +218,16 @@ public void testIncorrectNullability() throws Exception { // Override the nullability to get the correct result final Map typeMapping = new HashMap<>(); - JdbcFieldInfo realFieldInfo = new JdbcFieldInfo( - Types.INTEGER, ResultSetMetaData.columnNullable, /*precision*/0, /*scale*/0); + JdbcFieldInfo realFieldInfo = + new JdbcFieldInfo( + Types.INTEGER, ResultSetMetaData.columnNullable, /*precision*/ 0, /*scale*/ 0); typeMapping.put(1, realFieldInfo); - config = new JdbcToArrowConfigBuilder( - allocator, JdbcToArrowUtils.getUtcCalendar(), /* include metadata */ false) - .setReuseVectorSchemaRoot(reuseVectorSchemaRoot) - .setExplicitTypesByColumnIndex(typeMapping) - .build(); + config = + new JdbcToArrowConfigBuilder( + allocator, JdbcToArrowUtils.getUtcCalendar(), /* include metadata */ false) + .setReuseVectorSchemaRoot(reuseVectorSchemaRoot) + .setExplicitTypesByColumnIndex(typeMapping) + .build(); try (ArrowVectorIterator iter = JdbcToArrow.sqlToArrowVectorIterator(rs, config)) { assertTrue(iter.hasNext()); final VectorSchemaRoot root = iter.next(); @@ -232,14 +243,16 @@ public void testIncorrectNullability() throws Exception { rs.beforeFirst(); // columnNullableUnknown won't override the metadata - realFieldInfo = new JdbcFieldInfo( - Types.INTEGER, ResultSetMetaData.columnNullableUnknown, /*precision*/0, /*scale*/0); + realFieldInfo = + new JdbcFieldInfo( + Types.INTEGER, ResultSetMetaData.columnNullableUnknown, /*precision*/ 0, /*scale*/ 0); typeMapping.put(1, realFieldInfo); - config = new JdbcToArrowConfigBuilder( - allocator, JdbcToArrowUtils.getUtcCalendar(), /* include metadata */ false) - .setReuseVectorSchemaRoot(reuseVectorSchemaRoot) - .setExplicitTypesByColumnIndex(typeMapping) - .build(); + config = + new JdbcToArrowConfigBuilder( + allocator, JdbcToArrowUtils.getUtcCalendar(), /* include metadata */ false) + .setReuseVectorSchemaRoot(reuseVectorSchemaRoot) + .setExplicitTypesByColumnIndex(typeMapping) + .build(); try (ArrowVectorIterator iter = JdbcToArrow.sqlToArrowVectorIterator(rs, config)) { assertTrue(iter.hasNext()); final VectorSchemaRoot root = iter.next(); @@ -257,7 +270,6 @@ public void testIncorrectNullability() throws Exception { private ResultSet buildIncorrectPrecisionAndScaleMetaDataResultSet() throws SQLException { ResultSetUtility.MockResultSetMetaData.MockColumnMetaData columnMetaData = ResultSetUtility.MockResultSetMetaData.MockColumnMetaData.builder() - .index(1) .sqlType(Types.DECIMAL) .precision(0) .scale(0) @@ -268,8 +280,8 @@ private ResultSet buildIncorrectPrecisionAndScaleMetaDataResultSet() throws SQLE return ResultSetUtility.MockResultSet.builder() .setMetaData(metadata) .addDataElement( - new ResultSetUtility.MockDataElement(new BigDecimal("1000000000000000.01"), Types.DECIMAL) - ) + new ResultSetUtility.MockDataElement( + new BigDecimal("1000000000000000.01"), Types.DECIMAL)) .finishRow() .build(); } @@ -277,7 +289,6 @@ private ResultSet buildIncorrectPrecisionAndScaleMetaDataResultSet() throws SQLE private ResultSet buildVaryingPrecisionAndScaleResultSet() throws SQLException { ResultSetUtility.MockResultSetMetaData.MockColumnMetaData columnMetaData = ResultSetUtility.MockResultSetMetaData.MockColumnMetaData.builder() - .index(1) .sqlType(Types.DECIMAL) .precision(0) .scale(0) @@ -288,12 +299,12 @@ private ResultSet buildVaryingPrecisionAndScaleResultSet() throws SQLException { return ResultSetUtility.MockResultSet.builder() .setMetaData(metadata) .addDataElement( - new ResultSetUtility.MockDataElement(new BigDecimal("1000000000000000.01"), Types.DECIMAL) - ) + new ResultSetUtility.MockDataElement( + new BigDecimal("1000000000000000.01"), Types.DECIMAL)) .finishRow() .addDataElement( - new ResultSetUtility.MockDataElement(new BigDecimal("1000000000300.0000001"), Types.DECIMAL) - ) + new ResultSetUtility.MockDataElement( + new BigDecimal("1000000000300.0000001"), Types.DECIMAL)) .finishRow() .build(); } diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/consumer/AbstractConsumerTest.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/consumer/AbstractConsumerTest.java index 96bac42214cef..6a25c58fbde7e 100644 --- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/consumer/AbstractConsumerTest.java +++ b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/consumer/AbstractConsumerTest.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.consumer; import org.apache.arrow.memory.BufferAllocator; @@ -35,5 +34,4 @@ public void setUp() { public void tearDown() { allocator.close(); } - } diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/consumer/BinaryConsumerTest.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/consumer/BinaryConsumerTest.java index a368023d49005..255770ecdbf6d 100644 --- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/consumer/BinaryConsumerTest.java +++ b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/consumer/BinaryConsumerTest.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.consumer; import static org.junit.Assert.assertArrayEquals; @@ -23,7 +22,6 @@ import java.io.ByteArrayInputStream; import java.io.IOException; - import org.apache.arrow.vector.BaseValueVector; import org.apache.arrow.vector.VarBinaryVector; import org.junit.Test; @@ -37,7 +35,8 @@ interface InputStreamConsumer { void consume(BinaryConsumer consumer) throws IOException; } - protected void assertConsume(boolean nullable, InputStreamConsumer dataConsumer, byte[][] expect) throws IOException { + protected void assertConsume(boolean nullable, InputStreamConsumer dataConsumer, byte[][] expect) + throws IOException { try (final VarBinaryVector vector = new VarBinaryVector("binary", allocator)) { BinaryConsumer consumer = BinaryConsumer.createConsumer(vector, 0, nullable); dataConsumer.consume(consumer); @@ -61,51 +60,59 @@ private byte[] createBytes(int length) { return bytes; } - public void testConsumeInputStream(byte[][] values, boolean nullable) throws IOException { - assertConsume(nullable, binaryConsumer -> { - for (byte[] value : values) { - binaryConsumer.consume(new ByteArrayInputStream(value)); - binaryConsumer.moveWriterPosition(); - } - }, values); + assertConsume( + nullable, + binaryConsumer -> { + for (byte[] value : values) { + binaryConsumer.consume(new ByteArrayInputStream(value)); + binaryConsumer.moveWriterPosition(); + } + }, + values); } @Test public void testConsumeInputStream() throws IOException { - testConsumeInputStream(new byte[][]{ - createBytes(DEFAULT_RECORD_BYTE_COUNT) - }, false); - - testConsumeInputStream(new byte[][]{ - createBytes(DEFAULT_RECORD_BYTE_COUNT), - createBytes(DEFAULT_RECORD_BYTE_COUNT) - }, false); - - testConsumeInputStream(new byte[][]{ - createBytes(DEFAULT_RECORD_BYTE_COUNT * 2), - createBytes(DEFAULT_RECORD_BYTE_COUNT), - createBytes(DEFAULT_RECORD_BYTE_COUNT) - }, false); - - testConsumeInputStream(new byte[][]{ - createBytes(INITIAL_VALUE_ALLOCATION * DEFAULT_RECORD_BYTE_COUNT) - }, false); - - testConsumeInputStream(new byte[][]{ - createBytes(INITIAL_VALUE_ALLOCATION * DEFAULT_RECORD_BYTE_COUNT * 10), - }, false); - - testConsumeInputStream(new byte[][]{ - createBytes(INITIAL_VALUE_ALLOCATION * DEFAULT_RECORD_BYTE_COUNT), - createBytes(INITIAL_VALUE_ALLOCATION * DEFAULT_RECORD_BYTE_COUNT) - }, false); - - testConsumeInputStream(new byte[][]{ - createBytes(INITIAL_VALUE_ALLOCATION * DEFAULT_RECORD_BYTE_COUNT), - createBytes(DEFAULT_RECORD_BYTE_COUNT), - createBytes(INITIAL_VALUE_ALLOCATION * DEFAULT_RECORD_BYTE_COUNT) - }, false); + testConsumeInputStream(new byte[][] {createBytes(DEFAULT_RECORD_BYTE_COUNT)}, false); + + testConsumeInputStream( + new byte[][] { + createBytes(DEFAULT_RECORD_BYTE_COUNT), createBytes(DEFAULT_RECORD_BYTE_COUNT) + }, + false); + + testConsumeInputStream( + new byte[][] { + createBytes(DEFAULT_RECORD_BYTE_COUNT * 2), + createBytes(DEFAULT_RECORD_BYTE_COUNT), + createBytes(DEFAULT_RECORD_BYTE_COUNT) + }, + false); + + testConsumeInputStream( + new byte[][] {createBytes(INITIAL_VALUE_ALLOCATION * DEFAULT_RECORD_BYTE_COUNT)}, false); + + testConsumeInputStream( + new byte[][] { + createBytes(INITIAL_VALUE_ALLOCATION * DEFAULT_RECORD_BYTE_COUNT * 10), + }, + false); + + testConsumeInputStream( + new byte[][] { + createBytes(INITIAL_VALUE_ALLOCATION * DEFAULT_RECORD_BYTE_COUNT), + createBytes(INITIAL_VALUE_ALLOCATION * DEFAULT_RECORD_BYTE_COUNT) + }, + false); + + testConsumeInputStream( + new byte[][] { + createBytes(INITIAL_VALUE_ALLOCATION * DEFAULT_RECORD_BYTE_COUNT), + createBytes(DEFAULT_RECORD_BYTE_COUNT), + createBytes(INITIAL_VALUE_ALLOCATION * DEFAULT_RECORD_BYTE_COUNT) + }, + false); byte[][] testRecords = new byte[INITIAL_VALUE_ALLOCATION * 2][]; for (int i = 0; i < testRecords.length; i++) { @@ -113,5 +120,4 @@ public void testConsumeInputStream() throws IOException { } testConsumeInputStream(testRecords, false); } - } diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcAliasToArrowTest.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcAliasToArrowTest.java index d9acfe88f4f8b..e22686e890580 100644 --- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcAliasToArrowTest.java +++ b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcAliasToArrowTest.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.h2; import static org.apache.arrow.adapter.jdbc.AbstractJdbcToArrowTest.sqlToArrow; @@ -28,7 +27,6 @@ import java.sql.SQLException; import java.sql.Statement; import java.util.List; - import org.apache.arrow.memory.RootAllocator; import org.apache.arrow.vector.VectorSchemaRoot; import org.apache.arrow.vector.types.pojo.Field; @@ -40,10 +38,8 @@ public class JdbcAliasToArrowTest { private Connection conn = null; - private static final String CREATE_STATEMENT = - "CREATE TABLE example_table (id INTEGER);"; - private static final String INSERT_STATEMENT = - "INSERT INTO example_table (id) VALUES (?);"; + private static final String CREATE_STATEMENT = "CREATE TABLE example_table (id INTEGER);"; + private static final String INSERT_STATEMENT = "INSERT INTO example_table (id) VALUES (?);"; private static final String QUERY = "SELECT id as a, id as b FROM example_table;"; private static final String DROP_STATEMENT = "DROP TABLE example_table;"; private static final String ORIGINAL_COLUMN_NAME = "ID"; @@ -62,10 +58,9 @@ public void setUp() throws Exception { } /** - * Test h2 database query with alias for column name and column label. - * To verify reading field alias from an H2 database works as expected. - * If this test fails, something is either wrong with the setup, - * or the H2 SQL behavior changed. + * Test h2 database query with alias for column name and column label. To verify reading field + * alias from an H2 database works as expected. If this test fails, something is either wrong with + * the setup, or the H2 SQL behavior changed. */ @Test public void testReadH2Alias() throws Exception { @@ -96,8 +91,8 @@ public void testReadH2Alias() throws Exception { } /** - * Test jdbc query results with alias to arrow works expected. - * Arrow result schema name should be field alias name. + * Test jdbc query results with alias to arrow works expected. Arrow result schema name should be + * field alias name. */ @Test public void testJdbcAliasToArrow() throws Exception { @@ -105,14 +100,13 @@ public void testJdbcAliasToArrow() throws Exception { insertRows(rowCount); try (ResultSet resultSet = conn.createStatement().executeQuery(QUERY)) { - final VectorSchemaRoot vector = - sqlToArrow(resultSet, new RootAllocator(Integer.MAX_VALUE)); + final VectorSchemaRoot vector = sqlToArrow(resultSet, new RootAllocator(Integer.MAX_VALUE)); assertEquals(rowCount, vector.getRowCount()); Schema vectorSchema = vector.getSchema(); List vectorFields = vectorSchema.getFields(); - assertEquals(vectorFields.get(0).getName(), COLUMN_A); - assertEquals(vectorFields.get(1).getName(), COLUMN_B); + assertEquals(COLUMN_A, vectorFields.get(0).getName()); + assertEquals(COLUMN_B, vectorFields.get(1).getName()); } } diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowArrayTest.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowArrayTest.java index 377e332b43a13..895dab52ca534 100644 --- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowArrayTest.java +++ b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowArrayTest.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.h2; import static org.apache.arrow.adapter.jdbc.AbstractJdbcToArrowTest.sqlToArrow; @@ -22,6 +21,7 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; +import java.nio.charset.StandardCharsets; import java.sql.Array; import java.sql.Connection; import java.sql.DriverManager; @@ -33,7 +33,6 @@ import java.sql.Types; import java.util.HashMap; import java.util.Map; - import org.apache.arrow.adapter.jdbc.JdbcFieldInfo; import org.apache.arrow.adapter.jdbc.JdbcToArrowConfig; import org.apache.arrow.adapter.jdbc.JdbcToArrowConfigBuilder; @@ -53,11 +52,12 @@ public class JdbcToArrowArrayTest { private Connection conn = null; private static final String CREATE_STATEMENT = - "CREATE TABLE array_table (id INTEGER, int_array INTEGER ARRAY, float_array REAL ARRAY, " + - "string_array VARCHAR ARRAY);"; + "CREATE TABLE array_table (id INTEGER, int_array INTEGER ARRAY, float_array REAL ARRAY, " + + "string_array VARCHAR ARRAY);"; private static final String INSERT_STATEMENT = "INSERT INTO array_table (id, int_array, float_array, string_array) VALUES (?, ?, ?, ?);"; - private static final String QUERY = "SELECT int_array, float_array, string_array FROM array_table ORDER BY id;"; + private static final String QUERY = + "SELECT int_array, float_array, string_array FROM array_table ORDER BY id;"; private static final String DROP_STATEMENT = "DROP TABLE array_table;"; private static Map arrayFieldMapping; @@ -157,7 +157,8 @@ public void testJdbcToArrow() throws Exception { insertRows(rowCount, intArrays, floatArrays, strArrays); final JdbcToArrowConfigBuilder builder = - new JdbcToArrowConfigBuilder(new RootAllocator(Integer.MAX_VALUE), JdbcToArrowUtils.getUtcCalendar(), false); + new JdbcToArrowConfigBuilder( + new RootAllocator(Integer.MAX_VALUE), JdbcToArrowUtils.getUtcCalendar(), false); builder.setArraySubTypeByColumnNameMap(arrayFieldMapping); final JdbcToArrowConfig config = builder.build(); @@ -167,9 +168,12 @@ public void testJdbcToArrow() throws Exception { assertEquals(rowCount, vector.getRowCount()); - assertIntegerVectorEquals((ListVector) vector.getVector(INT_ARRAY_FIELD_NAME), rowCount, intArrays); - assertFloatVectorEquals((ListVector) vector.getVector(FLOAT_ARRAY_FIELD_NAME), rowCount, floatArrays); - assertStringVectorEquals((ListVector) vector.getVector(STRING_ARRAY_FIELD_NAME), rowCount, strArrays); + assertIntegerVectorEquals( + (ListVector) vector.getVector(INT_ARRAY_FIELD_NAME), rowCount, intArrays); + assertFloatVectorEquals( + (ListVector) vector.getVector(FLOAT_ARRAY_FIELD_NAME), rowCount, floatArrays); + assertStringVectorEquals( + (ListVector) vector.getVector(STRING_ARRAY_FIELD_NAME), rowCount, strArrays); } } @@ -178,30 +182,22 @@ public void testJdbcToArrowWithNulls() throws Exception { int rowCount = 4; Integer[][] intArrays = { - null, - {0}, - {1}, - {}, + null, {0}, {1}, {}, }; Float[][] floatArrays = { - { 2.0f }, - null, - { 3.0f }, - {}, + {2.0f}, null, {3.0f}, {}, }; String[][] stringArrays = { - {"4"}, - null, - {"5"}, - {}, + {"4"}, null, {"5"}, {}, }; insertRows(rowCount, intArrays, floatArrays, stringArrays); final JdbcToArrowConfigBuilder builder = - new JdbcToArrowConfigBuilder(new RootAllocator(Integer.MAX_VALUE), JdbcToArrowUtils.getUtcCalendar(), false); + new JdbcToArrowConfigBuilder( + new RootAllocator(Integer.MAX_VALUE), JdbcToArrowUtils.getUtcCalendar(), false); builder.setArraySubTypeByColumnNameMap(arrayFieldMapping); final JdbcToArrowConfig config = builder.build(); @@ -211,13 +207,17 @@ public void testJdbcToArrowWithNulls() throws Exception { assertEquals(rowCount, vector.getRowCount()); - assertIntegerVectorEquals((ListVector) vector.getVector(INT_ARRAY_FIELD_NAME), rowCount, intArrays); - assertFloatVectorEquals((ListVector) vector.getVector(FLOAT_ARRAY_FIELD_NAME), rowCount, floatArrays); - assertStringVectorEquals((ListVector) vector.getVector(STRING_ARRAY_FIELD_NAME), rowCount, stringArrays); + assertIntegerVectorEquals( + (ListVector) vector.getVector(INT_ARRAY_FIELD_NAME), rowCount, intArrays); + assertFloatVectorEquals( + (ListVector) vector.getVector(FLOAT_ARRAY_FIELD_NAME), rowCount, floatArrays); + assertStringVectorEquals( + (ListVector) vector.getVector(STRING_ARRAY_FIELD_NAME), rowCount, stringArrays); } } - private void assertIntegerVectorEquals(ListVector listVector, int rowCount, Integer[][] expectedValues) { + private void assertIntegerVectorEquals( + ListVector listVector, int rowCount, Integer[][] expectedValues) { IntVector vector = (IntVector) listVector.getDataVector(); ArrowBuf offsetBuffer = listVector.getOffsetBuffer(); @@ -242,7 +242,8 @@ private void assertIntegerVectorEquals(ListVector listVector, int rowCount, Inte } } - private void assertFloatVectorEquals(ListVector listVector, int rowCount, Float[][] expectedValues) { + private void assertFloatVectorEquals( + ListVector listVector, int rowCount, Float[][] expectedValues) { Float4Vector vector = (Float4Vector) listVector.getDataVector(); ArrowBuf offsetBuffer = listVector.getOffsetBuffer(); @@ -267,7 +268,8 @@ private void assertFloatVectorEquals(ListVector listVector, int rowCount, Float[ } } - private void assertStringVectorEquals(ListVector listVector, int rowCount, String[][] expectedValues) { + private void assertStringVectorEquals( + ListVector listVector, int rowCount, String[][] expectedValues) { VarCharVector vector = (VarCharVector) listVector.getDataVector(); ArrowBuf offsetBuffer = listVector.getOffsetBuffer(); @@ -284,7 +286,8 @@ private void assertStringVectorEquals(ListVector listVector, int rowCount, Strin assertEquals(1, listVector.isSet(row)); assertEquals(expectedValues[row].length, offset - prevOffset); for (int i = prevOffset; i < offset; ++i) { - assertArrayEquals(expectedValues[row][i - prevOffset].getBytes(), vector.get(i)); + assertArrayEquals( + expectedValues[row][i - prevOffset].getBytes(StandardCharsets.UTF_8), vector.get(i)); } prevOffset = offset; @@ -308,7 +311,7 @@ private Integer[][] generateIntegerArrayField(int numRows) { for (int i = 0; i < numRows; ++i) { int val = i * 4; - result[i] = new Integer[]{val, val + 1, val + 2, val + 3}; + result[i] = new Integer[] {val, val + 1, val + 2, val + 3}; } return result; @@ -316,10 +319,10 @@ private Integer[][] generateIntegerArrayField(int numRows) { private Float[][] generateFloatArrayField(int numRows) { Float[][] result = new Float[numRows][]; - + for (int i = 0; i < numRows; ++i) { int val = i * 4; - result[i] = new Float[]{(float) val, (float) val + 1, (float) val + 2, (float) val + 3}; + result[i] = new Float[] {(float) val, (float) val + 1, (float) val + 2, (float) val + 3}; } return result; @@ -330,22 +333,21 @@ private String[][] generateStringArrayField(int numRows) { for (int i = 0; i < numRows; ++i) { int val = i * 4; - result[i] = new String[]{ - String.valueOf(val), - String.valueOf(val + 1), - String.valueOf(val + 2), - String.valueOf(val + 3) }; + result[i] = + new String[] { + String.valueOf(val), + String.valueOf(val + 1), + String.valueOf(val + 2), + String.valueOf(val + 3) + }; } return result; } private void insertRows( - int numRows, - Integer[][] integerArrays, - Float[][] floatArrays, - String[][] strArrays) - throws SQLException { + int numRows, Integer[][] integerArrays, Float[][] floatArrays, String[][] strArrays) + throws SQLException { // Insert 4 Rows try (PreparedStatement stmt = conn.prepareStatement(INSERT_STATEMENT)) { diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowCharSetTest.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowCharSetTest.java index 422b55070aaf9..14de2d6dc8f3c 100644 --- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowCharSetTest.java +++ b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowCharSetTest.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.h2; import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertVarcharVectorValues; @@ -29,7 +28,6 @@ import java.util.Arrays; import java.util.Calendar; import java.util.Collection; - import org.apache.arrow.adapter.jdbc.AbstractJdbcToArrowTest; import org.apache.arrow.adapter.jdbc.JdbcToArrowConfig; import org.apache.arrow.adapter.jdbc.JdbcToArrowConfigBuilder; @@ -47,8 +45,8 @@ import org.junit.runners.Parameterized.Parameters; /** - * JUnit Test Class which contains methods to test JDBC to Arrow data conversion functionality with UTF-8 Charset, - * including the multi-byte CJK characters for H2 database. + * JUnit Test Class which contains methods to test JDBC to Arrow data conversion functionality with + * UTF-8 Charset, including the multi-byte CJK characters for H2 database. */ @RunWith(Parameterized.class) public class JdbcToArrowCharSetTest extends AbstractJdbcToArrowTest { @@ -76,12 +74,13 @@ public JdbcToArrowCharSetTest(Table table) { * @throws ClassNotFoundException on error */ @Before + @Override public void setUp() throws SQLException, ClassNotFoundException { String url = "jdbc:h2:mem:JdbcToArrowTest?characterEncoding=UTF-8"; String driver = "org.h2.Driver"; Class.forName(driver); conn = DriverManager.getConnection(url); - try (Statement stmt = conn.createStatement();) { + try (Statement stmt = conn.createStatement(); ) { stmt.executeUpdate(table.getCreate()); for (String insert : table.getData()) { stmt.executeUpdate(insert); @@ -98,38 +97,59 @@ public void setUp() throws SQLException, ClassNotFoundException { * @throws IOException on error */ @Parameters - public static Collection getTestData() throws SQLException, ClassNotFoundException, IOException { + public static Collection getTestData() + throws SQLException, ClassNotFoundException, IOException { return Arrays.asList(prepareTestData(testFiles, JdbcToArrowCharSetTest.class)); } /** - * Test Method to test JdbcToArrow Functionality for various H2 DB based datatypes with UTF-8 Charset, including - * the multi-byte CJK characters. + * Test Method to test JdbcToArrow Functionality for various H2 DB based datatypes with UTF-8 + * Charset, including the multi-byte CJK characters. */ @Test + @Override public void testJdbcToArrowValues() throws SQLException, IOException { - testDataSets(sqlToArrow(conn, table.getQuery(), new RootAllocator(Integer.MAX_VALUE), - Calendar.getInstance()), false); + testDataSets( + sqlToArrow( + conn, table.getQuery(), new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance()), + false); testDataSets(sqlToArrow(conn, table.getQuery(), new RootAllocator(Integer.MAX_VALUE)), false); - testDataSets(sqlToArrow(conn.createStatement().executeQuery(table.getQuery()), - new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance()), false); + testDataSets( + sqlToArrow( + conn.createStatement().executeQuery(table.getQuery()), + new RootAllocator(Integer.MAX_VALUE), + Calendar.getInstance()), + false); testDataSets(sqlToArrow(conn.createStatement().executeQuery(table.getQuery())), false); - testDataSets(sqlToArrow(conn.createStatement().executeQuery(table.getQuery()), - new RootAllocator(Integer.MAX_VALUE)), false); - testDataSets(sqlToArrow(conn.createStatement().executeQuery(table.getQuery()), - Calendar.getInstance()), false); - testDataSets(sqlToArrow( - conn.createStatement().executeQuery(table.getQuery()), - new JdbcToArrowConfigBuilder(new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance()).build()), false); - testDataSets(sqlToArrow( - conn, - table.getQuery(), - new JdbcToArrowConfigBuilder(new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance()).build()), false); + testDataSets( + sqlToArrow( + conn.createStatement().executeQuery(table.getQuery()), + new RootAllocator(Integer.MAX_VALUE)), + false); + testDataSets( + sqlToArrow(conn.createStatement().executeQuery(table.getQuery()), Calendar.getInstance()), + false); + testDataSets( + sqlToArrow( + conn.createStatement().executeQuery(table.getQuery()), + new JdbcToArrowConfigBuilder( + new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance()) + .build()), + false); + testDataSets( + sqlToArrow( + conn, + table.getQuery(), + new JdbcToArrowConfigBuilder( + new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance()) + .build()), + false); } @Test public void testJdbcSchemaMetadata() throws SQLException { - JdbcToArrowConfig config = new JdbcToArrowConfigBuilder(new RootAllocator(0), Calendar.getInstance(), true).build(); + JdbcToArrowConfig config = + new JdbcToArrowConfigBuilder(new RootAllocator(0), Calendar.getInstance(), true).build(); ResultSetMetaData rsmd = conn.createStatement().executeQuery(table.getQuery()).getMetaData(); Schema schema = JdbcToArrowUtils.jdbcToArrowSchema(rsmd, config); JdbcToArrowTestHelper.assertFieldMetadataMatchesResultSetMetadata(rsmd, schema); @@ -139,19 +159,26 @@ public void testJdbcSchemaMetadata() throws SQLException { * This method calls the assert methods for various DataSets. * * @param root VectorSchemaRoot for test - * @param isIncludeMapVector is this dataset checks includes map column. - * Jdbc type to 'map' mapping declared in configuration only manually + * @param isIncludeMapVector is this dataset checks includes map column. Jdbc type to 'map' + * mapping declared in configuration only manually */ + @Override public void testDataSets(VectorSchemaRoot root, boolean isIncludeMapVector) { JdbcToArrowTestHelper.assertFieldMetadataIsEmpty(root); - assertVarcharVectorValues((VarCharVector) root.getVector(CLOB), table.getRowCount(), + assertVarcharVectorValues( + (VarCharVector) root.getVector(CLOB), + table.getRowCount(), getCharArrayWithCharSet(table.getValues(), CLOB, StandardCharsets.UTF_8)); - assertVarcharVectorValues((VarCharVector) root.getVector(VARCHAR), table.getRowCount(), + assertVarcharVectorValues( + (VarCharVector) root.getVector(VARCHAR), + table.getRowCount(), getCharArrayWithCharSet(table.getValues(), VARCHAR, StandardCharsets.UTF_8)); - assertVarcharVectorValues((VarCharVector) root.getVector(CHAR), table.getRowCount(), + assertVarcharVectorValues( + (VarCharVector) root.getVector(CHAR), + table.getRowCount(), getCharArrayWithCharSet(table.getValues(), CHAR, StandardCharsets.UTF_8)); } } diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowDataTypesTest.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowDataTypesTest.java index ae4fffd0f94f0..d7c4be03b3542 100644 --- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowDataTypesTest.java +++ b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowDataTypesTest.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.h2; import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertBigIntVectorValues; @@ -40,7 +39,6 @@ import java.util.Arrays; import java.util.Calendar; import java.util.Collection; - import org.apache.arrow.adapter.jdbc.AbstractJdbcToArrowTest; import org.apache.arrow.adapter.jdbc.JdbcToArrowConfig; import org.apache.arrow.adapter.jdbc.JdbcToArrowConfigBuilder; @@ -71,8 +69,8 @@ import org.junit.runners.Parameterized.Parameters; /** - * JUnit Test Class which contains methods to test JDBC to Arrow data conversion functionality with various data types - * for H2 database using multiple test data files. + * JUnit Test Class which contains methods to test JDBC to Arrow data conversion functionality with + * various data types for H2 database using multiple test data files. */ @RunWith(Parameterized.class) public class JdbcToArrowDataTypesTest extends AbstractJdbcToArrowTest { @@ -137,42 +135,60 @@ public JdbcToArrowDataTypesTest(Table table) { * @throws IOException on error */ @Parameters - public static Collection getTestData() throws SQLException, ClassNotFoundException, IOException { + public static Collection getTestData() + throws SQLException, ClassNotFoundException, IOException { return Arrays.asList(prepareTestData(testFiles, JdbcToArrowDataTypesTest.class)); } - /** - * Test Method to test JdbcToArrow Functionality for various H2 DB based datatypes. - */ + /** Test Method to test JdbcToArrow Functionality for various H2 DB based datatypes. */ @Test + @Override public void testJdbcToArrowValues() throws SQLException, IOException { - testDataSets(sqlToArrow(conn, table.getQuery(), new RootAllocator(Integer.MAX_VALUE), - Calendar.getInstance()), false); + testDataSets( + sqlToArrow( + conn, table.getQuery(), new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance()), + false); testDataSets(sqlToArrow(conn, table.getQuery(), new RootAllocator(Integer.MAX_VALUE)), false); - testDataSets(sqlToArrow(conn.createStatement().executeQuery(table.getQuery()), - new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance()), false); + testDataSets( + sqlToArrow( + conn.createStatement().executeQuery(table.getQuery()), + new RootAllocator(Integer.MAX_VALUE), + Calendar.getInstance()), + false); testDataSets(sqlToArrow(conn.createStatement().executeQuery(table.getQuery())), false); - testDataSets(sqlToArrow(conn.createStatement().executeQuery(table.getQuery()), - new RootAllocator(Integer.MAX_VALUE)), false); - testDataSets(sqlToArrow(conn.createStatement().executeQuery(table.getQuery()), Calendar.getInstance()), false); - testDataSets(sqlToArrow( - conn.createStatement().executeQuery(table.getQuery()), - new JdbcToArrowConfigBuilder(new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance()) - .setArraySubTypeByColumnNameMap(ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP) - .build()), false); - testDataSets(sqlToArrow( - conn, - table.getQuery(), - new JdbcToArrowConfigBuilder(new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance()) - .setArraySubTypeByColumnNameMap(ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP) - .build()), false); + testDataSets( + sqlToArrow( + conn.createStatement().executeQuery(table.getQuery()), + new RootAllocator(Integer.MAX_VALUE)), + false); + testDataSets( + sqlToArrow(conn.createStatement().executeQuery(table.getQuery()), Calendar.getInstance()), + false); + testDataSets( + sqlToArrow( + conn.createStatement().executeQuery(table.getQuery()), + new JdbcToArrowConfigBuilder( + new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance()) + .setArraySubTypeByColumnNameMap(ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP) + .build()), + false); + testDataSets( + sqlToArrow( + conn, + table.getQuery(), + new JdbcToArrowConfigBuilder( + new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance()) + .setArraySubTypeByColumnNameMap(ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP) + .build()), + false); } @Test public void testJdbcSchemaMetadata() throws SQLException { - JdbcToArrowConfig config = new JdbcToArrowConfigBuilder(new RootAllocator(0), Calendar.getInstance(), true) - .setArraySubTypeByColumnNameMap(ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP) - .build(); + JdbcToArrowConfig config = + new JdbcToArrowConfigBuilder(new RootAllocator(0), Calendar.getInstance(), true) + .setArraySubTypeByColumnNameMap(ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP) + .build(); ResultSetMetaData rsmd = conn.createStatement().executeQuery(table.getQuery()).getMetaData(); Schema schema = JdbcToArrowUtils.jdbcToArrowSchema(rsmd, config); JdbcToArrowTestHelper.assertFieldMetadataMatchesResultSetMetadata(rsmd, schema); @@ -182,77 +198,108 @@ public void testJdbcSchemaMetadata() throws SQLException { * This method calls the assert methods for various DataSets. * * @param root VectorSchemaRoot for test - * @param isIncludeMapVector is this dataset checks includes map column. - * Jdbc type to 'map' mapping declared in configuration only manually + * @param isIncludeMapVector is this dataset checks includes map column. Jdbc type to 'map' + * mapping declared in configuration only manually */ + @Override public void testDataSets(VectorSchemaRoot root, boolean isIncludeMapVector) { JdbcToArrowTestHelper.assertFieldMetadataIsEmpty(root); switch (table.getType()) { case BIGINT: - assertBigIntVectorValues((BigIntVector) root.getVector(table.getVector()), table.getValues().length, + assertBigIntVectorValues( + (BigIntVector) root.getVector(table.getVector()), + table.getValues().length, table.getLongValues()); break; case BINARY: case BLOB: - assertVarBinaryVectorValues((VarBinaryVector) root.getVector(table.getVector()), table.getValues().length, + assertVarBinaryVectorValues( + (VarBinaryVector) root.getVector(table.getVector()), + table.getValues().length, table.getBinaryValues()); break; case BIT: - assertBitVectorValues((BitVector) root.getVector(table.getVector()), table.getValues().length, + assertBitVectorValues( + (BitVector) root.getVector(table.getVector()), + table.getValues().length, table.getIntValues()); break; case BOOL: - assertBooleanVectorValues((BitVector) root.getVector(table.getVector()), table.getValues().length, + assertBooleanVectorValues( + (BitVector) root.getVector(table.getVector()), + table.getValues().length, table.getBoolValues()); break; case CHAR: case VARCHAR: case CLOB: - assertVarcharVectorValues((VarCharVector) root.getVector(table.getVector()), table.getValues().length, + assertVarcharVectorValues( + (VarCharVector) root.getVector(table.getVector()), + table.getValues().length, table.getCharValues()); break; case DATE: - assertDateVectorValues((DateDayVector) root.getVector(table.getVector()), table.getValues().length, + assertDateVectorValues( + (DateDayVector) root.getVector(table.getVector()), + table.getValues().length, table.getIntValues()); break; case TIME: - assertTimeVectorValues((TimeMilliVector) root.getVector(table.getVector()), table.getValues().length, + assertTimeVectorValues( + (TimeMilliVector) root.getVector(table.getVector()), + table.getValues().length, table.getLongValues()); break; case TIMESTAMP: - assertTimeStampVectorValues((TimeStampVector) root.getVector(table.getVector()), table.getValues().length, + assertTimeStampVectorValues( + (TimeStampVector) root.getVector(table.getVector()), + table.getValues().length, table.getLongValues()); break; case DECIMAL: - assertDecimalVectorValues((DecimalVector) root.getVector(table.getVector()), table.getValues().length, + assertDecimalVectorValues( + (DecimalVector) root.getVector(table.getVector()), + table.getValues().length, table.getBigDecimalValues()); break; case DOUBLE: - assertFloat8VectorValues((Float8Vector) root.getVector(table.getVector()), table.getValues().length, + assertFloat8VectorValues( + (Float8Vector) root.getVector(table.getVector()), + table.getValues().length, table.getDoubleValues()); break; case INT: - assertIntVectorValues((IntVector) root.getVector(table.getVector()), table.getValues().length, + assertIntVectorValues( + (IntVector) root.getVector(table.getVector()), + table.getValues().length, table.getIntValues()); break; case SMALLINT: - assertSmallIntVectorValues((SmallIntVector) root.getVector(table.getVector()), table.getValues().length, + assertSmallIntVectorValues( + (SmallIntVector) root.getVector(table.getVector()), + table.getValues().length, table.getIntValues()); break; case TINYINT: - assertTinyIntVectorValues((TinyIntVector) root.getVector(table.getVector()), table.getValues().length, + assertTinyIntVectorValues( + (TinyIntVector) root.getVector(table.getVector()), + table.getValues().length, table.getIntValues()); break; case REAL: - assertFloat4VectorValues((Float4Vector) root.getVector(table.getVector()), table.getValues().length, + assertFloat4VectorValues( + (Float4Vector) root.getVector(table.getVector()), + table.getValues().length, table.getFloatValues()); break; case NULL: assertNullVectorValues((NullVector) root.getVector(table.getVector()), table.getRowCount()); break; case LIST: - assertListVectorValues((ListVector) root.getVector(table.getVector()), table.getValues().length, + assertListVectorValues( + (ListVector) root.getVector(table.getVector()), + table.getValues().length, table.getListValues()); break; default: @@ -261,4 +308,3 @@ public void testDataSets(VectorSchemaRoot root, boolean isIncludeMapVector) { } } } - diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowMapDataTypeTest.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowMapDataTypeTest.java index 43862a93c39c9..8bb3812637acb 100644 --- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowMapDataTypeTest.java +++ b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowMapDataTypeTest.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.h2; import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertMapVectorValues; @@ -24,7 +23,6 @@ import java.sql.ResultSetMetaData; import java.sql.SQLException; import java.util.Calendar; - import org.apache.arrow.adapter.jdbc.AbstractJdbcToArrowTest; import org.apache.arrow.adapter.jdbc.JdbcToArrowConfigBuilder; import org.apache.arrow.memory.RootAllocator; @@ -32,44 +30,48 @@ import org.apache.arrow.vector.complex.MapVector; import org.junit.Test; -/** - * Test MapConsumer with OTHER jdbc type. - */ +/** Test MapConsumer with OTHER jdbc type. */ public class JdbcToArrowMapDataTypeTest extends AbstractJdbcToArrowTest { public JdbcToArrowMapDataTypeTest() throws IOException { this.table = getTable("h2/test1_map_h2.yml", JdbcToArrowMapDataTypeTest.class); } - /** - * Test Method to test JdbcToArrow Functionality for Map form Types.OTHER column - */ + /** Test Method to test JdbcToArrow Functionality for Map form Types.OTHER column */ @Test + @Override public void testJdbcToArrowValues() throws SQLException, IOException { Calendar calendar = Calendar.getInstance(); ResultSetMetaData rsmd = getQueryMetaData(table.getQuery()); - testDataSets(sqlToArrow( + testDataSets( + sqlToArrow( conn.createStatement().executeQuery(table.getQuery()), - new JdbcToArrowConfigBuilder(new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance()) - .setJdbcToArrowTypeConverter(jdbcToArrowTypeConverter(calendar, rsmd)) - .build()), true); - testDataSets(sqlToArrow( + new JdbcToArrowConfigBuilder( + new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance()) + .setJdbcToArrowTypeConverter(jdbcToArrowTypeConverter(calendar, rsmd)) + .build()), + true); + testDataSets( + sqlToArrow( conn, table.getQuery(), - new JdbcToArrowConfigBuilder(new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance()) - .setJdbcToArrowTypeConverter(jdbcToArrowTypeConverter(calendar, rsmd)) - .build()), true); + new JdbcToArrowConfigBuilder( + new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance()) + .setJdbcToArrowTypeConverter(jdbcToArrowTypeConverter(calendar, rsmd)) + .build()), + true); } /** * This method calls the assert methods for various DataSets. * * @param root VectorSchemaRoot for test - * @param isIncludeMapVector is this dataset checks includes map column. - * Jdbc type to 'map' mapping declared in configuration only manually + * @param isIncludeMapVector is this dataset checks includes map column. Jdbc type to 'map' + * mapping declared in configuration only manually */ + @Override public void testDataSets(VectorSchemaRoot root, boolean isIncludeMapVector) { - assertMapVectorValues((MapVector) root.getVector(MAP), table.getRowCount(), - getMapValues(table.getValues(), MAP)); + assertMapVectorValues( + (MapVector) root.getVector(MAP), table.getRowCount(), getMapValues(table.getValues(), MAP)); } } diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowNullTest.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowNullTest.java index 5731f27c5b345..51394764e385c 100644 --- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowNullTest.java +++ b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowNullTest.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.h2; import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertBigIntVectorValues; @@ -51,7 +50,6 @@ import java.util.Arrays; import java.util.Calendar; import java.util.Collection; - import org.apache.arrow.adapter.jdbc.AbstractJdbcToArrowTest; import org.apache.arrow.adapter.jdbc.JdbcToArrowConfig; import org.apache.arrow.adapter.jdbc.JdbcToArrowConfigBuilder; @@ -82,8 +80,8 @@ import org.junit.runners.Parameterized.Parameters; /** - * JUnit Test Class which contains methods to test JDBC to Arrow data conversion functionality with null values for - * H2 database. + * JUnit Test Class which contains methods to test JDBC to Arrow data conversion functionality with + * null values for H2 database. */ @RunWith(Parameterized.class) public class JdbcToArrowNullTest extends AbstractJdbcToArrowTest { @@ -116,46 +114,67 @@ public JdbcToArrowNullTest(Table table) { * @throws IOException on error */ @Parameters - public static Collection getTestData() throws SQLException, ClassNotFoundException, IOException { + public static Collection getTestData() + throws SQLException, ClassNotFoundException, IOException { return Arrays.asList(prepareTestData(testFiles, JdbcToArrowNullTest.class)); } /** - * Test Method to test JdbcToArrow Functionality for various H2 DB based datatypes with null values. + * Test Method to test JdbcToArrow Functionality for various H2 DB based datatypes with null + * values. */ @Test + @Override public void testJdbcToArrowValues() throws SQLException, IOException { - testDataSets(sqlToArrow(conn, table.getQuery(), new RootAllocator(Integer.MAX_VALUE), - Calendar.getInstance()), false); + testDataSets( + sqlToArrow( + conn, table.getQuery(), new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance()), + false); testDataSets(sqlToArrow(conn, table.getQuery(), new RootAllocator(Integer.MAX_VALUE)), false); - testDataSets(sqlToArrow(conn.createStatement().executeQuery(table.getQuery()), - new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance()), false); + testDataSets( + sqlToArrow( + conn.createStatement().executeQuery(table.getQuery()), + new RootAllocator(Integer.MAX_VALUE), + Calendar.getInstance()), + false); testDataSets(sqlToArrow(conn.createStatement().executeQuery(table.getQuery())), false); - testDataSets(sqlToArrow(conn.createStatement().executeQuery(table.getQuery()), - new RootAllocator(Integer.MAX_VALUE)), false); - testDataSets(sqlToArrow(conn.createStatement().executeQuery(table.getQuery()), Calendar.getInstance()), false); + testDataSets( + sqlToArrow( + conn.createStatement().executeQuery(table.getQuery()), + new RootAllocator(Integer.MAX_VALUE)), + false); + testDataSets( + sqlToArrow(conn.createStatement().executeQuery(table.getQuery()), Calendar.getInstance()), + false); Calendar calendar = Calendar.getInstance(); ResultSetMetaData rsmd = getQueryMetaData(table.getQuery()); - testDataSets(sqlToArrow( - conn.createStatement().executeQuery(table.getQuery()), - new JdbcToArrowConfigBuilder(new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance()) - .setArraySubTypeByColumnNameMap(ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP) - .setJdbcToArrowTypeConverter(jdbcToArrowTypeConverter(calendar, rsmd)) - .build()), true); - testDataSets(sqlToArrow( - conn, - table.getQuery(), - new JdbcToArrowConfigBuilder(new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance()) - .setArraySubTypeByColumnNameMap(ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP) - .setJdbcToArrowTypeConverter(jdbcToArrowTypeConverter(calendar, rsmd)) - .build()), true); + testDataSets( + sqlToArrow( + conn.createStatement().executeQuery(table.getQuery()), + new JdbcToArrowConfigBuilder( + new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance()) + .setArraySubTypeByColumnNameMap(ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP) + .setJdbcToArrowTypeConverter(jdbcToArrowTypeConverter(calendar, rsmd)) + .build()), + true); + testDataSets( + sqlToArrow( + conn, + table.getQuery(), + new JdbcToArrowConfigBuilder( + new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance()) + .setArraySubTypeByColumnNameMap(ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP) + .setJdbcToArrowTypeConverter(jdbcToArrowTypeConverter(calendar, rsmd)) + .build()), + true); } @Test public void testJdbcSchemaMetadata() throws SQLException { - JdbcToArrowConfig config = new JdbcToArrowConfigBuilder(new RootAllocator(0), Calendar.getInstance(), true) - .setArraySubTypeByColumnNameMap(ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP) - .build(); + JdbcToArrowConfig config = + new JdbcToArrowConfigBuilder(new RootAllocator(0), Calendar.getInstance(), true) + .setArraySubTypeByColumnNameMap(ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP) + .build(); ResultSetMetaData rsmd = conn.createStatement().executeQuery(table.getQuery()).getMetaData(); Schema schema = JdbcToArrowUtils.jdbcToArrowSchema(rsmd, config); JdbcToArrowTestHelper.assertFieldMetadataMatchesResultSetMetadata(rsmd, schema); @@ -165,9 +184,10 @@ public void testJdbcSchemaMetadata() throws SQLException { * This method calls the assert methods for various DataSets. * * @param root VectorSchemaRoot for test - * @param isIncludeMapVector is this dataset checks includes map column. - * Jdbc type to 'map' mapping declared in configuration only manually + * @param isIncludeMapVector is this dataset checks includes map column. Jdbc type to 'map' + * mapping declared in configuration only manually */ + @Override public void testDataSets(VectorSchemaRoot root, boolean isIncludeMapVector) { JdbcToArrowTestHelper.assertFieldMetadataIsEmpty(root); @@ -176,7 +196,8 @@ public void testDataSets(VectorSchemaRoot root, boolean isIncludeMapVector) { sqlToArrowTestNullValues(table.getVectors(), root, table.getRowCount()); break; case SELECTED_NULL_COLUMN: - sqlToArrowTestSelectedNullColumnsValues(table.getVectors(), root, table.getRowCount(), isIncludeMapVector); + sqlToArrowTestSelectedNullColumnsValues( + table.getVectors(), root, table.getRowCount(), isIncludeMapVector); break; case SELECTED_NULL_ROW: testAllVectorValues(root, isIncludeMapVector); @@ -190,62 +211,96 @@ public void testDataSets(VectorSchemaRoot root, boolean isIncludeMapVector) { private void testAllVectorValues(VectorSchemaRoot root, boolean isIncludeMapVector) { JdbcToArrowTestHelper.assertFieldMetadataIsEmpty(root); - assertBigIntVectorValues((BigIntVector) root.getVector(BIGINT), table.getRowCount(), + assertBigIntVectorValues( + (BigIntVector) root.getVector(BIGINT), + table.getRowCount(), getLongValues(table.getValues(), BIGINT)); - assertTinyIntVectorValues((TinyIntVector) root.getVector(TINYINT), table.getRowCount(), + assertTinyIntVectorValues( + (TinyIntVector) root.getVector(TINYINT), + table.getRowCount(), getIntValues(table.getValues(), TINYINT)); - assertSmallIntVectorValues((SmallIntVector) root.getVector(SMALLINT), table.getRowCount(), + assertSmallIntVectorValues( + (SmallIntVector) root.getVector(SMALLINT), + table.getRowCount(), getIntValues(table.getValues(), SMALLINT)); - assertVarBinaryVectorValues((VarBinaryVector) root.getVector(BINARY), table.getRowCount(), + assertVarBinaryVectorValues( + (VarBinaryVector) root.getVector(BINARY), + table.getRowCount(), getBinaryValues(table.getValues(), BINARY)); - assertVarBinaryVectorValues((VarBinaryVector) root.getVector(BLOB), table.getRowCount(), + assertVarBinaryVectorValues( + (VarBinaryVector) root.getVector(BLOB), + table.getRowCount(), getBinaryValues(table.getValues(), BLOB)); - assertVarcharVectorValues((VarCharVector) root.getVector(CLOB), table.getRowCount(), + assertVarcharVectorValues( + (VarCharVector) root.getVector(CLOB), + table.getRowCount(), getCharArray(table.getValues(), CLOB)); - assertVarcharVectorValues((VarCharVector) root.getVector(VARCHAR), table.getRowCount(), + assertVarcharVectorValues( + (VarCharVector) root.getVector(VARCHAR), + table.getRowCount(), getCharArray(table.getValues(), VARCHAR)); - assertVarcharVectorValues((VarCharVector) root.getVector(CHAR), table.getRowCount(), + assertVarcharVectorValues( + (VarCharVector) root.getVector(CHAR), + table.getRowCount(), getCharArray(table.getValues(), CHAR)); - assertIntVectorValues((IntVector) root.getVector(INT), table.getRowCount(), - getIntValues(table.getValues(), INT)); + assertIntVectorValues( + (IntVector) root.getVector(INT), table.getRowCount(), getIntValues(table.getValues(), INT)); - assertBitVectorValues((BitVector) root.getVector(BIT), table.getRowCount(), - getIntValues(table.getValues(), BIT)); + assertBitVectorValues( + (BitVector) root.getVector(BIT), table.getRowCount(), getIntValues(table.getValues(), BIT)); - assertBooleanVectorValues((BitVector) root.getVector(BOOL), table.getRowCount(), + assertBooleanVectorValues( + (BitVector) root.getVector(BOOL), + table.getRowCount(), getBooleanValues(table.getValues(), BOOL)); - assertDateVectorValues((DateDayVector) root.getVector(DATE), table.getRowCount(), + assertDateVectorValues( + (DateDayVector) root.getVector(DATE), + table.getRowCount(), getIntValues(table.getValues(), DATE)); - assertTimeVectorValues((TimeMilliVector) root.getVector(TIME), table.getRowCount(), + assertTimeVectorValues( + (TimeMilliVector) root.getVector(TIME), + table.getRowCount(), getLongValues(table.getValues(), TIME)); - assertTimeStampVectorValues((TimeStampVector) root.getVector(TIMESTAMP), table.getRowCount(), + assertTimeStampVectorValues( + (TimeStampVector) root.getVector(TIMESTAMP), + table.getRowCount(), getLongValues(table.getValues(), TIMESTAMP)); - assertDecimalVectorValues((DecimalVector) root.getVector(DECIMAL), table.getRowCount(), + assertDecimalVectorValues( + (DecimalVector) root.getVector(DECIMAL), + table.getRowCount(), getDecimalValues(table.getValues(), DECIMAL)); - assertFloat8VectorValues((Float8Vector) root.getVector(DOUBLE), table.getRowCount(), + assertFloat8VectorValues( + (Float8Vector) root.getVector(DOUBLE), + table.getRowCount(), getDoubleValues(table.getValues(), DOUBLE)); - assertFloat4VectorValues((Float4Vector) root.getVector(REAL), table.getRowCount(), + assertFloat4VectorValues( + (Float4Vector) root.getVector(REAL), + table.getRowCount(), getFloatValues(table.getValues(), REAL)); - assertListVectorValues((ListVector) root.getVector(LIST), table.getRowCount(), + assertListVectorValues( + (ListVector) root.getVector(LIST), + table.getRowCount(), getListValues(table.getValues(), LIST)); if (isIncludeMapVector) { - assertMapVectorValues((MapVector) root.getVector(MAP), table.getRowCount(), - getMapValues(table.getValues(), MAP)); + assertMapVectorValues( + (MapVector) root.getVector(MAP), + table.getRowCount(), + getMapValues(table.getValues(), MAP)); } } @@ -283,11 +338,11 @@ public void sqlToArrowTestNullValues(String[] vectors, VectorSchemaRoot root, in * @param vectors Vectors to test * @param root VectorSchemaRoot for test * @param rowCount number of rows - * @param isIncludeMapVector is this dataset checks includes map column. - * Jdbc type to 'map' mapping declared in configuration only manually + * @param isIncludeMapVector is this dataset checks includes map column. Jdbc type to 'map' + * mapping declared in configuration only manually */ - public void sqlToArrowTestSelectedNullColumnsValues(String[] vectors, VectorSchemaRoot root, int rowCount, - boolean isIncludeMapVector) { + public void sqlToArrowTestSelectedNullColumnsValues( + String[] vectors, VectorSchemaRoot root, int rowCount, boolean isIncludeMapVector) { assertNullValues((BigIntVector) root.getVector(vectors[0]), rowCount); assertNullValues((DecimalVector) root.getVector(vectors[1]), rowCount); assertNullValues((Float8Vector) root.getVector(vectors[2]), rowCount); @@ -306,5 +361,4 @@ public void sqlToArrowTestSelectedNullColumnsValues(String[] vectors, VectorSche assertNullValues((MapVector) root.getVector(vectors[14]), rowCount); } } - } diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowOptionalColumnsTest.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowOptionalColumnsTest.java index eebcbe64c0e0c..47713d9099da6 100644 --- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowOptionalColumnsTest.java +++ b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowOptionalColumnsTest.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.h2; import static junit.framework.TestCase.assertTrue; @@ -24,7 +23,6 @@ import java.sql.SQLException; import java.util.Arrays; import java.util.Collection; - import org.apache.arrow.adapter.jdbc.AbstractJdbcToArrowTest; import org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper; import org.apache.arrow.adapter.jdbc.Table; @@ -40,9 +38,7 @@ */ @RunWith(Parameterized.class) public class JdbcToArrowOptionalColumnsTest extends AbstractJdbcToArrowTest { - private static final String[] testFiles = { - "h2/test1_null_and_notnull.yml" - }; + private static final String[] testFiles = {"h2/test1_null_and_notnull.yml"}; /** * Constructor which populates the table object for each test iteration. @@ -57,36 +53,40 @@ public JdbcToArrowOptionalColumnsTest(Table table) { * Get the test data as a collection of Table objects for each test iteration. * * @return Collection of Table objects - * @throws SQLException on error + * @throws SQLException on error * @throws ClassNotFoundException on error - * @throws IOException on error + * @throws IOException on error */ @Parameterized.Parameters - public static Collection getTestData() throws SQLException, ClassNotFoundException, IOException { + public static Collection getTestData() + throws SQLException, ClassNotFoundException, IOException { return Arrays.asList(prepareTestData(testFiles, JdbcToArrowOptionalColumnsTest.class)); } /** - * Test Method to test JdbcToArrow Functionality for dealing with nullable and non-nullable columns. + * Test Method to test JdbcToArrow Functionality for dealing with nullable and non-nullable + * columns. */ @Test + @Override public void testJdbcToArrowValues() throws SQLException, IOException { testDataSets(sqlToArrow(conn, table.getQuery(), new RootAllocator(Integer.MAX_VALUE)), false); } /** - * This method calls the assert methods for various DataSets. We verify that a SQL `NULL` column becomes - * nullable in the VectorSchemaRoot, and that a SQL `NOT NULL` column becomes non-nullable. + * This method calls the assert methods for various DataSets. We verify that a SQL `NULL` column + * becomes nullable in the VectorSchemaRoot, and that a SQL `NOT NULL` column becomes + * non-nullable. * * @param root VectorSchemaRoot for test - * @param isIncludeMapVector is this dataset checks includes map column. - * Jdbc type to 'map' mapping declared in configuration only manually + * @param isIncludeMapVector is this dataset checks includes map column. Jdbc type to 'map' + * mapping declared in configuration only manually */ + @Override public void testDataSets(VectorSchemaRoot root, boolean isIncludeMapVector) { JdbcToArrowTestHelper.assertFieldMetadataIsEmpty(root); assertTrue(root.getSchema().getFields().get(0).isNullable()); assertFalse(root.getSchema().getFields().get(1).isNullable()); } - } diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowTest.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowTest.java index 7641fa7f1659c..d290b9bf08960 100644 --- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowTest.java +++ b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowTest.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.h2; import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.*; @@ -29,7 +28,6 @@ import java.util.Collection; import java.util.stream.Collectors; import java.util.stream.Stream; - import org.apache.arrow.adapter.jdbc.AbstractJdbcToArrowTest; import org.apache.arrow.adapter.jdbc.ArrowVectorIterator; import org.apache.arrow.adapter.jdbc.JdbcToArrow; @@ -64,8 +62,8 @@ import org.junit.runners.Parameterized; /** - * JUnit Test Class which contains methods to test JDBC to Arrow data conversion functionality with various data types - * for H2 database using single test data file. + * JUnit Test Class which contains methods to test JDBC to Arrow data conversion functionality with + * various data types for H2 database using single test data file. */ @RunWith(Parameterized.class) public class JdbcToArrowTest extends AbstractJdbcToArrowTest { @@ -92,52 +90,72 @@ public JdbcToArrowTest(Table table, boolean reuseVectorSchemaRoot) { * @throws IOException on error */ @Parameterized.Parameters(name = "table = {0}, reuse batch = {1}") - public static Collection getTestData() throws SQLException, ClassNotFoundException, IOException { - return Arrays.stream(prepareTestData(testFiles, JdbcToArrowTest.class)).flatMap(row -> - Stream.of(new Object[] {row[0], true}, new Object[] {row[0], false})).collect(Collectors.toList()); + public static Collection getTestData() + throws SQLException, ClassNotFoundException, IOException { + return Arrays.stream(prepareTestData(testFiles, JdbcToArrowTest.class)) + .flatMap(row -> Stream.of(new Object[] {row[0], true}, new Object[] {row[0], false})) + .collect(Collectors.toList()); } /** - * Test Method to test JdbcToArrow Functionality for various H2 DB based datatypes with only one test data file. + * Test Method to test JdbcToArrow Functionality for various H2 DB based datatypes with only one + * test data file. */ @Test + @Override public void testJdbcToArrowValues() throws SQLException, IOException { - testDataSets(sqlToArrow(conn, table.getQuery(), new RootAllocator(Integer.MAX_VALUE), - Calendar.getInstance()), false); + testDataSets( + sqlToArrow( + conn, table.getQuery(), new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance()), + false); testDataSets(sqlToArrow(conn, table.getQuery(), new RootAllocator(Integer.MAX_VALUE)), false); - testDataSets(sqlToArrow(conn.createStatement().executeQuery(table.getQuery()), - new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance()), false); + testDataSets( + sqlToArrow( + conn.createStatement().executeQuery(table.getQuery()), + new RootAllocator(Integer.MAX_VALUE), + Calendar.getInstance()), + false); testDataSets(sqlToArrow(conn.createStatement().executeQuery(table.getQuery())), false); - testDataSets(sqlToArrow(conn.createStatement().executeQuery(table.getQuery()), - new RootAllocator(Integer.MAX_VALUE)), false); - testDataSets(sqlToArrow(conn.createStatement().executeQuery(table.getQuery()), - Calendar.getInstance()), false); + testDataSets( + sqlToArrow( + conn.createStatement().executeQuery(table.getQuery()), + new RootAllocator(Integer.MAX_VALUE)), + false); + testDataSets( + sqlToArrow(conn.createStatement().executeQuery(table.getQuery()), Calendar.getInstance()), + false); Calendar calendar = Calendar.getInstance(); ResultSetMetaData rsmd = getQueryMetaData(table.getQuery()); - testDataSets(sqlToArrow( - conn.createStatement().executeQuery(table.getQuery()), - new JdbcToArrowConfigBuilder(new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance()) - .setArraySubTypeByColumnNameMap(ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP) - .setJdbcToArrowTypeConverter(jdbcToArrowTypeConverter(calendar, rsmd)) - .build()), true); - testDataSets(sqlToArrow( - conn, - table.getQuery(), - new JdbcToArrowConfigBuilder(new RootAllocator(Integer.MAX_VALUE), calendar) - .setArraySubTypeByColumnNameMap(ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP) - .setJdbcToArrowTypeConverter(jdbcToArrowTypeConverter(calendar, rsmd)) - .build()), true); + testDataSets( + sqlToArrow( + conn.createStatement().executeQuery(table.getQuery()), + new JdbcToArrowConfigBuilder( + new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance()) + .setArraySubTypeByColumnNameMap(ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP) + .setJdbcToArrowTypeConverter(jdbcToArrowTypeConverter(calendar, rsmd)) + .build()), + true); + testDataSets( + sqlToArrow( + conn, + table.getQuery(), + new JdbcToArrowConfigBuilder(new RootAllocator(Integer.MAX_VALUE), calendar) + .setArraySubTypeByColumnNameMap(ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP) + .setJdbcToArrowTypeConverter(jdbcToArrowTypeConverter(calendar, rsmd)) + .build()), + true); } @Test public void testJdbcSchemaMetadata() throws SQLException { Calendar calendar = Calendar.getInstance(); ResultSetMetaData rsmd = getQueryMetaData(table.getQuery()); - JdbcToArrowConfig config = new JdbcToArrowConfigBuilder(new RootAllocator(0), calendar, true) - .setReuseVectorSchemaRoot(reuseVectorSchemaRoot) - .setJdbcToArrowTypeConverter(jdbcToArrowTypeConverter(calendar, rsmd)) - .setArraySubTypeByColumnNameMap(ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP) - .build(); + JdbcToArrowConfig config = + new JdbcToArrowConfigBuilder(new RootAllocator(0), calendar, true) + .setReuseVectorSchemaRoot(reuseVectorSchemaRoot) + .setJdbcToArrowTypeConverter(jdbcToArrowTypeConverter(calendar, rsmd)) + .setArraySubTypeByColumnNameMap(ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP) + .build(); Schema schema = JdbcToArrowUtils.jdbcToArrowSchema(rsmd, config); JdbcToArrowTestHelper.assertFieldMetadataMatchesResultSetMetadata(rsmd, schema); } @@ -146,70 +164,105 @@ public void testJdbcSchemaMetadata() throws SQLException { * This method calls the assert methods for various DataSets. * * @param root VectorSchemaRoot for test - * @param isIncludeMapVector is this dataset checks includes map column. - * Jdbc type to 'map' mapping declared in configuration only manually + * @param isIncludeMapVector is this dataset checks includes map column. Jdbc type to 'map' + * mapping declared in configuration only manually */ + @Override public void testDataSets(VectorSchemaRoot root, boolean isIncludeMapVector) { JdbcToArrowTestHelper.assertFieldMetadataIsEmpty(root); - assertBigIntVectorValues((BigIntVector) root.getVector(BIGINT), table.getRowCount(), + assertBigIntVectorValues( + (BigIntVector) root.getVector(BIGINT), + table.getRowCount(), getLongValues(table.getValues(), BIGINT)); - assertTinyIntVectorValues((TinyIntVector) root.getVector(TINYINT), table.getRowCount(), + assertTinyIntVectorValues( + (TinyIntVector) root.getVector(TINYINT), + table.getRowCount(), getIntValues(table.getValues(), TINYINT)); - assertSmallIntVectorValues((SmallIntVector) root.getVector(SMALLINT), table.getRowCount(), + assertSmallIntVectorValues( + (SmallIntVector) root.getVector(SMALLINT), + table.getRowCount(), getIntValues(table.getValues(), SMALLINT)); - assertVarBinaryVectorValues((VarBinaryVector) root.getVector(BINARY), table.getRowCount(), + assertVarBinaryVectorValues( + (VarBinaryVector) root.getVector(BINARY), + table.getRowCount(), getBinaryValues(table.getValues(), BINARY)); - assertVarBinaryVectorValues((VarBinaryVector) root.getVector(BLOB), table.getRowCount(), + assertVarBinaryVectorValues( + (VarBinaryVector) root.getVector(BLOB), + table.getRowCount(), getBinaryValues(table.getValues(), BLOB)); - assertVarcharVectorValues((VarCharVector) root.getVector(CLOB), table.getRowCount(), + assertVarcharVectorValues( + (VarCharVector) root.getVector(CLOB), + table.getRowCount(), getCharArray(table.getValues(), CLOB)); - assertVarcharVectorValues((VarCharVector) root.getVector(VARCHAR), table.getRowCount(), + assertVarcharVectorValues( + (VarCharVector) root.getVector(VARCHAR), + table.getRowCount(), getCharArray(table.getValues(), VARCHAR)); - assertVarcharVectorValues((VarCharVector) root.getVector(CHAR), table.getRowCount(), + assertVarcharVectorValues( + (VarCharVector) root.getVector(CHAR), + table.getRowCount(), getCharArray(table.getValues(), CHAR)); - assertIntVectorValues((IntVector) root.getVector(INT), table.getRowCount(), - getIntValues(table.getValues(), INT)); + assertIntVectorValues( + (IntVector) root.getVector(INT), table.getRowCount(), getIntValues(table.getValues(), INT)); - assertBitVectorValues((BitVector) root.getVector(BIT), table.getRowCount(), - getIntValues(table.getValues(), BIT)); + assertBitVectorValues( + (BitVector) root.getVector(BIT), table.getRowCount(), getIntValues(table.getValues(), BIT)); - assertBooleanVectorValues((BitVector) root.getVector(BOOL), table.getRowCount(), + assertBooleanVectorValues( + (BitVector) root.getVector(BOOL), + table.getRowCount(), getBooleanValues(table.getValues(), BOOL)); - assertDateVectorValues((DateDayVector) root.getVector(DATE), table.getRowCount(), + assertDateVectorValues( + (DateDayVector) root.getVector(DATE), + table.getRowCount(), getIntValues(table.getValues(), DATE)); - assertTimeVectorValues((TimeMilliVector) root.getVector(TIME), table.getRowCount(), + assertTimeVectorValues( + (TimeMilliVector) root.getVector(TIME), + table.getRowCount(), getLongValues(table.getValues(), TIME)); - assertTimeStampVectorValues((TimeStampVector) root.getVector(TIMESTAMP), table.getRowCount(), + assertTimeStampVectorValues( + (TimeStampVector) root.getVector(TIMESTAMP), + table.getRowCount(), getLongValues(table.getValues(), TIMESTAMP)); - assertDecimalVectorValues((DecimalVector) root.getVector(DECIMAL), table.getRowCount(), + assertDecimalVectorValues( + (DecimalVector) root.getVector(DECIMAL), + table.getRowCount(), getDecimalValues(table.getValues(), DECIMAL)); - assertFloat8VectorValues((Float8Vector) root.getVector(DOUBLE), table.getRowCount(), + assertFloat8VectorValues( + (Float8Vector) root.getVector(DOUBLE), + table.getRowCount(), getDoubleValues(table.getValues(), DOUBLE)); - assertFloat4VectorValues((Float4Vector) root.getVector(REAL), table.getRowCount(), + assertFloat4VectorValues( + (Float4Vector) root.getVector(REAL), + table.getRowCount(), getFloatValues(table.getValues(), REAL)); assertNullVectorValues((NullVector) root.getVector(NULL), table.getRowCount()); - assertListVectorValues((ListVector) root.getVector(LIST), table.getRowCount(), + assertListVectorValues( + (ListVector) root.getVector(LIST), + table.getRowCount(), getListValues(table.getValues(), LIST)); if (isIncludeMapVector) { - assertMapVectorValues((MapVector) root.getVector(MAP), table.getRowCount(), - getMapValues(table.getValues(), MAP)); + assertMapVectorValues( + (MapVector) root.getVector(MAP), + table.getRowCount(), + getMapValues(table.getValues(), MAP)); } } @@ -219,11 +272,12 @@ public void runLargeNumberOfRows() throws IOException, SQLException { int x = 0; final int targetRows = 600000; ResultSet rs = ResultSetUtility.generateBasicResultSet(targetRows); - JdbcToArrowConfig config = new JdbcToArrowConfigBuilder( - allocator, JdbcToArrowUtils.getUtcCalendar(), /* include metadata */ false) - .setReuseVectorSchemaRoot(reuseVectorSchemaRoot) - .setArraySubTypeByColumnNameMap(ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP) - .build(); + JdbcToArrowConfig config = + new JdbcToArrowConfigBuilder( + allocator, JdbcToArrowUtils.getUtcCalendar(), /* include metadata */ false) + .setReuseVectorSchemaRoot(reuseVectorSchemaRoot) + .setArraySubTypeByColumnNameMap(ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP) + .build(); try (ArrowVectorIterator iter = JdbcToArrow.sqlToArrowVectorIterator(rs, config)) { while (iter.hasNext()) { diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowTimeZoneTest.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowTimeZoneTest.java index 462a75da5143a..c4930c3ab6017 100644 --- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowTimeZoneTest.java +++ b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowTimeZoneTest.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.h2; import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertDateVectorValues; @@ -28,7 +27,6 @@ import java.util.Calendar; import java.util.Collection; import java.util.TimeZone; - import org.apache.arrow.adapter.jdbc.AbstractJdbcToArrowTest; import org.apache.arrow.adapter.jdbc.JdbcToArrowConfig; import org.apache.arrow.adapter.jdbc.JdbcToArrowConfigBuilder; @@ -47,10 +45,9 @@ import org.junit.runners.Parameterized.Parameters; /** - * JUnit Test Class which contains methods to test JDBC to Arrow data conversion functionality with TimeZone based Date, - * Time and Timestamp datatypes for H2 database. + * JUnit Test Class which contains methods to test JDBC to Arrow data conversion functionality with + * TimeZone based Date, Time and Timestamp datatypes for H2 database. */ - @RunWith(Parameterized.class) public class JdbcToArrowTimeZoneTest extends AbstractJdbcToArrowTest { @@ -94,39 +91,60 @@ public JdbcToArrowTimeZoneTest(Table table) { * @throws IOException on error */ @Parameters - public static Collection getTestData() throws SQLException, ClassNotFoundException, IOException { + public static Collection getTestData() + throws SQLException, ClassNotFoundException, IOException { return Arrays.asList(prepareTestData(testFiles, JdbcToArrowTimeZoneTest.class)); } /** - * Test Method to test JdbcToArrow Functionality for various H2 DB based datatypes with TimeZone based Date, - * Time and Timestamp datatype. + * Test Method to test JdbcToArrow Functionality for various H2 DB based datatypes with TimeZone + * based Date, Time and Timestamp datatype. */ @Test + @Override public void testJdbcToArrowValues() throws SQLException, IOException { - testDataSets(sqlToArrow(conn, table.getQuery(), new RootAllocator(Integer.MAX_VALUE), - Calendar.getInstance(TimeZone.getTimeZone(table.getTimezone()))), false); - testDataSets(sqlToArrow(conn.createStatement().executeQuery(table.getQuery()), - new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance(TimeZone.getTimeZone(table.getTimezone()))), false); - testDataSets(sqlToArrow(conn.createStatement().executeQuery(table.getQuery()), - Calendar.getInstance(TimeZone.getTimeZone(table.getTimezone()))), false); - testDataSets(sqlToArrow( - conn.createStatement().executeQuery(table.getQuery()), - new JdbcToArrowConfigBuilder( + testDataSets( + sqlToArrow( + conn, + table.getQuery(), new RootAllocator(Integer.MAX_VALUE), - Calendar.getInstance(TimeZone.getTimeZone(table.getTimezone()))).build()), false); - testDataSets(sqlToArrow( - conn, - table.getQuery(), - new JdbcToArrowConfigBuilder( + Calendar.getInstance(TimeZone.getTimeZone(table.getTimezone()))), + false); + testDataSets( + sqlToArrow( + conn.createStatement().executeQuery(table.getQuery()), new RootAllocator(Integer.MAX_VALUE), - Calendar.getInstance(TimeZone.getTimeZone(table.getTimezone()))).build()), false); + Calendar.getInstance(TimeZone.getTimeZone(table.getTimezone()))), + false); + testDataSets( + sqlToArrow( + conn.createStatement().executeQuery(table.getQuery()), + Calendar.getInstance(TimeZone.getTimeZone(table.getTimezone()))), + false); + testDataSets( + sqlToArrow( + conn.createStatement().executeQuery(table.getQuery()), + new JdbcToArrowConfigBuilder( + new RootAllocator(Integer.MAX_VALUE), + Calendar.getInstance(TimeZone.getTimeZone(table.getTimezone()))) + .build()), + false); + testDataSets( + sqlToArrow( + conn, + table.getQuery(), + new JdbcToArrowConfigBuilder( + new RootAllocator(Integer.MAX_VALUE), + Calendar.getInstance(TimeZone.getTimeZone(table.getTimezone()))) + .build()), + false); } @Test public void testJdbcSchemaMetadata() throws SQLException { Calendar calendar = Calendar.getInstance(TimeZone.getTimeZone(table.getTimezone())); - JdbcToArrowConfig config = new JdbcToArrowConfigBuilder(new RootAllocator(0), calendar, true).build(); + JdbcToArrowConfig config = + new JdbcToArrowConfigBuilder(new RootAllocator(0), calendar, true).build(); ResultSetMetaData rsmd = conn.createStatement().executeQuery(table.getQuery()).getMetaData(); Schema schema = JdbcToArrowUtils.jdbcToArrowSchema(rsmd, config); JdbcToArrowTestHelper.assertFieldMetadataMatchesResultSetMetadata(rsmd, schema); @@ -136,9 +154,10 @@ public void testJdbcSchemaMetadata() throws SQLException { * This method calls the assert methods for various DataSets. * * @param root VectorSchemaRoot for test - * @param isIncludeMapVector is this dataset checks includes map column. - * Jdbc type to 'map' mapping declared in configuration only manually + * @param isIncludeMapVector is this dataset checks includes map column. Jdbc type to 'map' + * mapping declared in configuration only manually */ + @Override public void testDataSets(VectorSchemaRoot root, boolean isIncludeMapVector) { JdbcToArrowTestHelper.assertFieldMetadataIsEmpty(root); @@ -146,19 +165,25 @@ public void testDataSets(VectorSchemaRoot root, boolean isIncludeMapVector) { case EST_DATE: case GMT_DATE: case PST_DATE: - assertDateVectorValues((DateDayVector) root.getVector(table.getVector()), table.getValues().length, + assertDateVectorValues( + (DateDayVector) root.getVector(table.getVector()), + table.getValues().length, table.getIntValues()); break; case EST_TIME: case GMT_TIME: case PST_TIME: - assertTimeVectorValues((TimeMilliVector) root.getVector(table.getVector()), table.getValues().length, + assertTimeVectorValues( + (TimeMilliVector) root.getVector(table.getVector()), + table.getValues().length, table.getLongValues()); break; case EST_TIMESTAMP: case GMT_TIMESTAMP: case PST_TIMESTAMP: - assertTimeStampVectorValues((TimeStampVector) root.getVector(table.getVector()), table.getValues().length, + assertTimeStampVectorValues( + (TimeStampVector) root.getVector(table.getVector()), + table.getValues().length, table.getLongValues()); break; default: @@ -166,5 +191,4 @@ public void testDataSets(VectorSchemaRoot root, boolean isIncludeMapVector) { break; } } - } diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowVectorIteratorTest.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowVectorIteratorTest.java index 1d7e2760f843e..caa1c1d971adb 100644 --- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowVectorIteratorTest.java +++ b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowVectorIteratorTest.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.h2; import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.getBinaryValues; @@ -42,7 +41,6 @@ import java.util.Arrays; import java.util.Calendar; import java.util.List; - import org.apache.arrow.adapter.jdbc.ArrowVectorIterator; import org.apache.arrow.adapter.jdbc.JdbcToArrow; import org.apache.arrow.adapter.jdbc.JdbcToArrowConfig; @@ -91,14 +89,15 @@ public JdbcToArrowVectorIteratorTest(Table table, boolean reuseVectorSchemaRoot) @Test @Override public void testJdbcToArrowValues() throws SQLException, IOException { - JdbcToArrowConfig config = new JdbcToArrowConfigBuilder(new RootAllocator(Integer.MAX_VALUE), - Calendar.getInstance()) - .setTargetBatchSize(3) - .setArraySubTypeByColumnNameMap(ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP) - .build(); + JdbcToArrowConfig config = + new JdbcToArrowConfigBuilder(new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance()) + .setTargetBatchSize(3) + .setArraySubTypeByColumnNameMap(ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP) + .build(); ArrowVectorIterator iterator = - JdbcToArrow.sqlToArrowVectorIterator(conn.createStatement().executeQuery(table.getQuery()), config); + JdbcToArrow.sqlToArrowVectorIterator( + conn.createStatement().executeQuery(table.getQuery()), config); validate(iterator); } @@ -106,27 +105,28 @@ public void testJdbcToArrowValues() throws SQLException, IOException { @Test public void testVectorSchemaRootReuse() throws SQLException, IOException { Integer[][] intValues = { - {101, 102, 103}, - {104, null, null}, - {107, 108, 109}, - {110} + {101, 102, 103}, + {104, null, null}, + {107, 108, 109}, + {110} }; Integer[][][] listValues = { - {{1, 2, 3}, {1, 2}, {1}}, - {{2, 3, 4}, {2, 3}, {2}}, - {{3, 4, 5}, {3, 4}, {3}}, - {{}} + {{1, 2, 3}, {1, 2}, {1}}, + {{2, 3, 4}, {2, 3}, {2}}, + {{3, 4, 5}, {3, 4}, {3}}, + {{}} }; - JdbcToArrowConfig config = new JdbcToArrowConfigBuilder(new RootAllocator(Integer.MAX_VALUE), - Calendar.getInstance()) - .setTargetBatchSize(3) - .setReuseVectorSchemaRoot(reuseVectorSchemaRoot) - .setArraySubTypeByColumnNameMap(ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP) - .build(); + JdbcToArrowConfig config = + new JdbcToArrowConfigBuilder(new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance()) + .setTargetBatchSize(3) + .setReuseVectorSchemaRoot(reuseVectorSchemaRoot) + .setArraySubTypeByColumnNameMap(ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP) + .build(); ArrowVectorIterator iterator = - JdbcToArrow.sqlToArrowVectorIterator(conn.createStatement().executeQuery(table.getQuery()), config); + JdbcToArrow.sqlToArrowVectorIterator( + conn.createStatement().executeQuery(table.getQuery()), config); int batchCount = 0; VectorSchemaRoot prev = null; @@ -178,14 +178,15 @@ public void testVectorSchemaRootReuse() throws SQLException, IOException { @Test public void testJdbcToArrowValuesNoLimit() throws SQLException, IOException { - JdbcToArrowConfig config = new JdbcToArrowConfigBuilder(new RootAllocator(Integer.MAX_VALUE), - Calendar.getInstance()) - .setTargetBatchSize(JdbcToArrowConfig.NO_LIMIT_BATCH_SIZE) - .setArraySubTypeByColumnNameMap(ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP) - .build(); + JdbcToArrowConfig config = + new JdbcToArrowConfigBuilder(new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance()) + .setTargetBatchSize(JdbcToArrowConfig.NO_LIMIT_BATCH_SIZE) + .setArraySubTypeByColumnNameMap(ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP) + .build(); ArrowVectorIterator iterator = - JdbcToArrow.sqlToArrowVectorIterator(conn.createStatement().executeQuery(table.getQuery()), config); + JdbcToArrow.sqlToArrowVectorIterator( + conn.createStatement().executeQuery(table.getQuery()), config); validate(iterator); } @@ -195,12 +196,12 @@ public void testTimeStampConsumer() throws SQLException, IOException { final String sql = "select timestamp_field11 from table1"; // first experiment, with calendar and time zone. - JdbcToArrowConfig config = new JdbcToArrowConfigBuilder(new RootAllocator(Integer.MAX_VALUE), - Calendar.getInstance()) - .setTargetBatchSize(3) - .setReuseVectorSchemaRoot(reuseVectorSchemaRoot) - .setArraySubTypeByColumnNameMap(ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP) - .build(); + JdbcToArrowConfig config = + new JdbcToArrowConfigBuilder(new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance()) + .setTargetBatchSize(3) + .setReuseVectorSchemaRoot(reuseVectorSchemaRoot) + .setArraySubTypeByColumnNameMap(ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP) + .build(); assertNotNull(config.getCalendar()); try (ArrowVectorIterator iterator = @@ -213,16 +214,16 @@ public void testTimeStampConsumer() throws SQLException, IOException { } // second experiment, without calendar and time zone. - config = new JdbcToArrowConfigBuilder(new RootAllocator(Integer.MAX_VALUE), - null) - .setTargetBatchSize(3) - .setReuseVectorSchemaRoot(reuseVectorSchemaRoot) - .setArraySubTypeByColumnNameMap(ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP) - .build(); + config = + new JdbcToArrowConfigBuilder(new RootAllocator(Integer.MAX_VALUE), null) + .setTargetBatchSize(3) + .setReuseVectorSchemaRoot(reuseVectorSchemaRoot) + .setArraySubTypeByColumnNameMap(ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP) + .build(); assertNull(config.getCalendar()); try (ArrowVectorIterator iterator = - JdbcToArrow.sqlToArrowVectorIterator(conn.createStatement().executeQuery(sql), config)) { + JdbcToArrow.sqlToArrowVectorIterator(conn.createStatement().executeQuery(sql), config)) { VectorSchemaRoot root = iterator.next(); assertEquals(1, root.getFieldVectors().size()); @@ -278,24 +279,40 @@ private void validate(ArrowVectorIterator iterator) throws SQLException, IOExcep float8Vectors.add((Float8Vector) root.getVector(DOUBLE)); listVectors.add((ListVector) root.getVector(LIST)); } - assertBigIntVectorValues(bigIntVectors, table.getRowCount(), getLongValues(table.getValues(), BIGINT)); - assertTinyIntVectorValues(tinyIntVectors, table.getRowCount(), getIntValues(table.getValues(), TINYINT)); + assertBigIntVectorValues( + bigIntVectors, table.getRowCount(), getLongValues(table.getValues(), BIGINT)); + assertTinyIntVectorValues( + tinyIntVectors, table.getRowCount(), getIntValues(table.getValues(), TINYINT)); assertIntVectorValues(intVectors, table.getRowCount(), getIntValues(table.getValues(), INT)); - assertSmallIntVectorValues(smallIntVectors, table.getRowCount(), getIntValues(table.getValues(), SMALLINT)); - assertBinaryVectorValues(vectorsForBinary, table.getRowCount(), getBinaryValues(table.getValues(), BINARY)); - assertBinaryVectorValues(vectorsForBlob, table.getRowCount(), getBinaryValues(table.getValues(), BLOB)); - assertVarCharVectorValues(vectorsForClob, table.getRowCount(), getCharArray(table.getValues(), CLOB)); - assertVarCharVectorValues(vectorsForVarChar, table.getRowCount(), getCharArray(table.getValues(), VARCHAR)); - assertVarCharVectorValues(vectorsForChar, table.getRowCount(), getCharArray(table.getValues(), CHAR)); + assertSmallIntVectorValues( + smallIntVectors, table.getRowCount(), getIntValues(table.getValues(), SMALLINT)); + assertBinaryVectorValues( + vectorsForBinary, table.getRowCount(), getBinaryValues(table.getValues(), BINARY)); + assertBinaryVectorValues( + vectorsForBlob, table.getRowCount(), getBinaryValues(table.getValues(), BLOB)); + assertVarCharVectorValues( + vectorsForClob, table.getRowCount(), getCharArray(table.getValues(), CLOB)); + assertVarCharVectorValues( + vectorsForVarChar, table.getRowCount(), getCharArray(table.getValues(), VARCHAR)); + assertVarCharVectorValues( + vectorsForChar, table.getRowCount(), getCharArray(table.getValues(), CHAR)); assertBitVectorValues(vectorsForBit, table.getRowCount(), getIntValues(table.getValues(), BIT)); - assertBooleanVectorValues(vectorsForBool, table.getRowCount(), getBooleanValues(table.getValues(), BOOL)); - assertDateDayVectorValues(dateDayVectors, table.getRowCount(), getLongValues(table.getValues(), DATE)); - assertTimeMilliVectorValues(timeMilliVectors, table.getRowCount(), getLongValues(table.getValues(), TIME)); - assertTimeStampVectorValues(timeStampVectors, table.getRowCount(), getLongValues(table.getValues(), TIMESTAMP)); - assertDecimalVectorValues(decimalVectors, table.getRowCount(), getDecimalValues(table.getValues(), DECIMAL)); - assertFloat4VectorValues(float4Vectors, table.getRowCount(), getFloatValues(table.getValues(), REAL)); - assertFloat8VectorValues(float8Vectors, table.getRowCount(), getDoubleValues(table.getValues(), DOUBLE)); - assertListVectorValues(listVectors, table.getRowCount(), getListValues(table.getValues(), LIST)); + assertBooleanVectorValues( + vectorsForBool, table.getRowCount(), getBooleanValues(table.getValues(), BOOL)); + assertDateDayVectorValues( + dateDayVectors, table.getRowCount(), getLongValues(table.getValues(), DATE)); + assertTimeMilliVectorValues( + timeMilliVectors, table.getRowCount(), getLongValues(table.getValues(), TIME)); + assertTimeStampVectorValues( + timeStampVectors, table.getRowCount(), getLongValues(table.getValues(), TIMESTAMP)); + assertDecimalVectorValues( + decimalVectors, table.getRowCount(), getDecimalValues(table.getValues(), DECIMAL)); + assertFloat4VectorValues( + float4Vectors, table.getRowCount(), getFloatValues(table.getValues(), REAL)); + assertFloat8VectorValues( + float8Vectors, table.getRowCount(), getDoubleValues(table.getValues(), DOUBLE)); + assertListVectorValues( + listVectors, table.getRowCount(), getListValues(table.getValues(), LIST)); roots.forEach(root -> root.close()); } @@ -324,7 +341,8 @@ private void assertFloat4VectorValues(List vectors, int rowCount, } } - private void assertDecimalVectorValues(List vectors, int rowCount, BigDecimal[] values) { + private void assertDecimalVectorValues( + List vectors, int rowCount, BigDecimal[] values) { int valueCount = vectors.stream().mapToInt(ValueVector::getValueCount).sum(); assertEquals(rowCount, valueCount); @@ -337,7 +355,8 @@ private void assertDecimalVectorValues(List vectors, int rowCount } } - private void assertTimeStampVectorValues(List vectors, int rowCount, Long[] values) { + private void assertTimeStampVectorValues( + List vectors, int rowCount, Long[] values) { int valueCount = vectors.stream().mapToInt(ValueVector::getValueCount).sum(); assertEquals(rowCount, valueCount); @@ -349,7 +368,8 @@ private void assertTimeStampVectorValues(List vectors, int rowC } } - private void assertTimeMilliVectorValues(List vectors, int rowCount, Long[] values) { + private void assertTimeMilliVectorValues( + List vectors, int rowCount, Long[] values) { int valueCount = vectors.stream().mapToInt(ValueVector::getValueCount).sum(); assertEquals(rowCount, valueCount); @@ -397,7 +417,8 @@ private void assertBooleanVectorValues(List vectors, int rowCount, Bo } } - private void assertVarCharVectorValues(List vectors, int rowCount, byte[][] values) { + private void assertVarCharVectorValues( + List vectors, int rowCount, byte[][] values) { int valueCount = vectors.stream().mapToInt(ValueVector::getValueCount).sum(); assertEquals(rowCount, valueCount); @@ -409,7 +430,8 @@ private void assertVarCharVectorValues(List vectors, int rowCount } } - private void assertBinaryVectorValues(List vectors, int rowCount, byte[][] values) { + private void assertBinaryVectorValues( + List vectors, int rowCount, byte[][] values) { int valueCount = vectors.stream().mapToInt(ValueVector::getValueCount).sum(); assertEquals(rowCount, valueCount); @@ -421,7 +443,8 @@ private void assertBinaryVectorValues(List vectors, int rowCoun } } - private void assertSmallIntVectorValues(List vectors, int rowCount, Integer[] values) { + private void assertSmallIntVectorValues( + List vectors, int rowCount, Integer[] values) { int valueCount = vectors.stream().mapToInt(ValueVector::getValueCount).sum(); assertEquals(rowCount, valueCount); @@ -433,7 +456,8 @@ private void assertSmallIntVectorValues(List vectors, int rowCou } } - private void assertTinyIntVectorValues(List vectors, int rowCount, Integer[] values) { + private void assertTinyIntVectorValues( + List vectors, int rowCount, Integer[] values) { int valueCount = vectors.stream().mapToInt(ValueVector::getValueCount).sum(); assertEquals(rowCount, valueCount); @@ -474,7 +498,8 @@ private void assertIntVectorValues(List vectors, int rowCount, Intege } } - public static void assertListVectorValues(List vectors, int rowCount, Integer[][] values) { + public static void assertListVectorValues( + List vectors, int rowCount, Integer[][] values) { int valueCount = vectors.stream().mapToInt(ValueVector::getValueCount).sum(); assertEquals(rowCount, valueCount); @@ -492,12 +517,11 @@ public static void assertListVectorValues(List vectors, int rowCount } } - /** - * Runs a simple query, and encapsulates the result into a field vector. - */ + /** Runs a simple query, and encapsulates the result into a field vector. */ private FieldVector getQueryResult(JdbcToArrowConfig config) throws SQLException, IOException { - ArrowVectorIterator iterator = JdbcToArrow.sqlToArrowVectorIterator( - conn.createStatement().executeQuery("select real_field8 from table1"), config); + ArrowVectorIterator iterator = + JdbcToArrow.sqlToArrowVectorIterator( + conn.createStatement().executeQuery("select real_field8 from table1"), config); VectorSchemaRoot root = iterator.next(); @@ -513,10 +537,11 @@ private FieldVector getQueryResult(JdbcToArrowConfig config) throws SQLException @Test public void testJdbcToArrowCustomTypeConversion() throws SQLException, IOException { - JdbcToArrowConfigBuilder builder = new JdbcToArrowConfigBuilder(new RootAllocator(Integer.MAX_VALUE), - Calendar.getInstance()).setTargetBatchSize(JdbcToArrowConfig.NO_LIMIT_BATCH_SIZE) - .setReuseVectorSchemaRoot(reuseVectorSchemaRoot) - .setArraySubTypeByColumnNameMap(ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP); + JdbcToArrowConfigBuilder builder = + new JdbcToArrowConfigBuilder(new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance()) + .setTargetBatchSize(JdbcToArrowConfig.NO_LIMIT_BATCH_SIZE) + .setReuseVectorSchemaRoot(reuseVectorSchemaRoot) + .setArraySubTypeByColumnNameMap(ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP); // first experiment, using default type converter JdbcToArrowConfig config = builder.build(); @@ -527,15 +552,16 @@ public void testJdbcToArrowCustomTypeConversion() throws SQLException, IOExcepti } // second experiment, using customized type converter - builder.setJdbcToArrowTypeConverter((fieldInfo) -> { - switch (fieldInfo.getJdbcType()) { - case Types.REAL: - // this is different from the default type converter - return new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE); - default: - return null; - } - }); + builder.setJdbcToArrowTypeConverter( + (fieldInfo) -> { + switch (fieldInfo.getJdbcType()) { + case Types.REAL: + // this is different from the default type converter + return new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE); + default: + return null; + } + }); config = builder.build(); try (FieldVector vector = getQueryResult(config)) { diff --git a/java/adapter/orc/pom.xml b/java/adapter/orc/pom.xml index f6aadca6de4d3..7df08e1a98b36 100644 --- a/java/adapter/orc/pom.xml +++ b/java/adapter/orc/pom.xml @@ -24,9 +24,13 @@ jar Arrow Orc Adapter (Experimental/Contrib)A JNI wrapper for the C++ ORC reader implementation. + ../../../cpp/release-build/ + dev/checkstyle/checkstyle-spotless.xml + none + org.apache.arrow @@ -46,7 +50,7 @@ org.immutables - value + value-annotations org.apache.orc @@ -134,5 +138,22 @@ + + + org.apache.maven.plugins + maven-dependency-plugin + + + analyze + + + + org.apache.arrow:arrow-format + + + + + + diff --git a/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcFieldNode.java b/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcFieldNode.java index 716a13876608c..faf48e19445ae 100644 --- a/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcFieldNode.java +++ b/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcFieldNode.java @@ -14,12 +14,9 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.orc; -/** - * Metadata about Vectors/Arrays that is passed via JNI interface. - */ +/** Metadata about Vectors/Arrays that is passed via JNI interface. */ class OrcFieldNode { private final int length; @@ -27,6 +24,7 @@ class OrcFieldNode { /** * Construct a new instance. + * * @param length the number of values written. * @param nullCount the number of null values. */ diff --git a/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcJniUtils.java b/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcJniUtils.java index d61799e990f77..692b0c061839c 100644 --- a/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcJniUtils.java +++ b/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcJniUtils.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.orc; import java.io.File; @@ -25,24 +24,21 @@ import java.nio.file.StandardCopyOption; import java.util.Locale; -/** - * Helper class for JNI related operations. - */ +/** Helper class for JNI related operations. */ class OrcJniUtils { private static final String LIBRARY_NAME = "arrow_orc_jni"; private static boolean isLoaded = false; - private OrcJniUtils() { - } + private OrcJniUtils() {} - static void loadOrcAdapterLibraryFromJar() - throws IOException, IllegalAccessException { + static void loadOrcAdapterLibraryFromJar() throws IOException, IllegalAccessException { synchronized (OrcJniUtils.class) { if (!isLoaded) { final String libraryToLoad = LIBRARY_NAME + "/" + getNormalizedArch() + "/" + System.mapLibraryName(LIBRARY_NAME); final File libraryFile = - moveFileFromJarToTemp(System.getProperty("java.io.tmpdir"), libraryToLoad, LIBRARY_NAME); + moveFileFromJarToTemp( + System.getProperty("java.io.tmpdir"), libraryToLoad, LIBRARY_NAME); System.load(libraryFile.getAbsolutePath()); isLoaded = true; } @@ -64,11 +60,11 @@ private static String getNormalizedArch() { return arch; } - private static File moveFileFromJarToTemp(final String tmpDir, String libraryToLoad, String libraryName) - throws IOException { + private static File moveFileFromJarToTemp( + final String tmpDir, String libraryToLoad, String libraryName) throws IOException { final File temp = File.createTempFile(tmpDir, libraryName); - try (final InputStream is = OrcReaderJniWrapper.class.getClassLoader() - .getResourceAsStream(libraryToLoad)) { + try (final InputStream is = + OrcReaderJniWrapper.class.getClassLoader().getResourceAsStream(libraryToLoad)) { if (is == null) { throw new FileNotFoundException(libraryToLoad); } else { diff --git a/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcMemoryJniWrapper.java b/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcMemoryJniWrapper.java index 473e8314243b1..70f2a655654c6 100644 --- a/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcMemoryJniWrapper.java +++ b/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcMemoryJniWrapper.java @@ -14,12 +14,9 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.orc; -/** - * Wrapper for orc memory allocated by native code. - */ +/** Wrapper for orc memory allocated by native code. */ class OrcMemoryJniWrapper implements AutoCloseable { private final long nativeInstanceId; @@ -32,6 +29,7 @@ class OrcMemoryJniWrapper implements AutoCloseable { /** * Construct a new instance. + * * @param nativeInstanceId unique id of the underlying memory. * @param memoryAddress starting memory address of the underlying memory. * @param size size of the valid data. @@ -46,6 +44,7 @@ class OrcMemoryJniWrapper implements AutoCloseable { /** * Return the size of underlying chunk of memory that has valid data. + * * @return valid data size */ long getSize() { @@ -54,6 +53,7 @@ long getSize() { /** * Return the size of underlying chunk of memory managed by this OrcMemoryJniWrapper. + * * @return underlying memory size */ long getCapacity() { @@ -62,6 +62,7 @@ long getCapacity() { /** * Return the memory address of underlying chunk of memory. + * * @return memory address */ long getMemoryAddress() { diff --git a/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcReader.java b/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcReader.java index 648e17e9c374c..ca9b44e7e8123 100644 --- a/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcReader.java +++ b/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcReader.java @@ -14,44 +14,42 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.orc; import java.io.IOException; - import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.vector.ipc.ArrowReader; /** - * Orc Reader that allow accessing orc stripes in Orc file. - * This orc reader basically acts like an ArrowReader iterator that - * iterate over orc stripes. Each stripe will be accessed via an - * ArrowReader. + * Orc Reader that allow accessing orc stripes in Orc file. This orc reader basically acts like an + * ArrowReader iterator that iterate over orc stripes. Each stripe will be accessed via an + * ArrowReader. */ public class OrcReader implements AutoCloseable { private final OrcReaderJniWrapper jniWrapper; private BufferAllocator allocator; - /** - * reference to native reader instance. - */ + /** reference to native reader instance. */ private final long nativeInstanceId; /** * Create an OrcReader that iterate over orc stripes. + * * @param filePath file path to target file, currently only support local file. * @param allocator allocator provided to ArrowReader. * @throws IOException throws exception in case of file not found */ - public OrcReader(String filePath, BufferAllocator allocator) throws IOException, IllegalAccessException { + public OrcReader(String filePath, BufferAllocator allocator) + throws IOException, IllegalAccessException { this.allocator = allocator; this.jniWrapper = OrcReaderJniWrapper.getInstance(); this.nativeInstanceId = jniWrapper.open(filePath); } /** - * Seek to designated row. Invoke NextStripeReader() after seek - * will return stripe reader starting from designated row. + * Seek to designated row. Invoke NextStripeReader() after seek will return stripe reader starting + * from designated row. + * * @param rowNumber the rows number to seek * @return true if seek operation is succeeded */ diff --git a/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcReaderJniWrapper.java b/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcReaderJniWrapper.java index ff449c343c4e7..be57485005fbf 100644 --- a/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcReaderJniWrapper.java +++ b/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcReaderJniWrapper.java @@ -14,14 +14,11 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.orc; import java.io.IOException; -/** - * JNI wrapper for Orc reader. - */ +/** JNI wrapper for Orc reader. */ class OrcReaderJniWrapper { private static volatile OrcReaderJniWrapper INSTANCE; @@ -41,21 +38,24 @@ static OrcReaderJniWrapper getInstance() throws IOException, IllegalAccessExcept /** * Construct a orc file reader over the target file. + * * @param fileName absolute file path of target file - * @return id of the orc reader instance if file opened successfully, - * otherwise return error code * -1. + * @return id of the orc reader instance if file opened successfully, otherwise return error code + * * -1. */ native long open(String fileName); /** * Release resources associated with designated reader instance. + * * @param readerId id of the reader instance. */ native void close(long readerId); /** - * Seek to designated row. Invoke nextStripeReader() after seek - * will return id of stripe reader starting from designated row. + * Seek to designated row. Invoke nextStripeReader() after seek will return id of stripe reader + * starting from designated row. + * * @param readerId id of the reader instance * @param rowNumber the rows number to seek * @return true if seek operation is succeeded @@ -64,6 +64,7 @@ static OrcReaderJniWrapper getInstance() throws IOException, IllegalAccessExcept /** * The number of stripes in the file. + * * @param readerId id of the reader instance * @return number of stripes */ @@ -71,6 +72,7 @@ static OrcReaderJniWrapper getInstance() throws IOException, IllegalAccessExcept /** * Get a stripe level ArrowReader with specified batchSize in each record batch. + * * @param readerId id of the reader instance * @param batchSize the number of rows loaded on each iteration * @return id of the stripe reader instance. diff --git a/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcRecordBatch.java b/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcRecordBatch.java index a006cacab98f2..f78898df2205d 100644 --- a/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcRecordBatch.java +++ b/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcRecordBatch.java @@ -14,27 +14,23 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.orc; import java.util.Arrays; import java.util.List; -/** - * Wrapper for record batch meta and native memory. - */ +/** Wrapper for record batch meta and native memory. */ class OrcRecordBatch { final int length; - /** - * Nodes correspond to the pre-ordered flattened logical schema. - */ + /** Nodes correspond to the pre-ordered flattened logical schema. */ final List nodes; final List buffers; /** * Construct a new instance. + * * @param length number of records included in current batch * @param nodes meta data for each fields * @param buffers buffers for underlying data diff --git a/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcReferenceManager.java b/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcReferenceManager.java index fdec337e85d39..38233a0493bef 100644 --- a/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcReferenceManager.java +++ b/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcReferenceManager.java @@ -14,11 +14,9 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.orc; import java.util.concurrent.atomic.AtomicInteger; - import org.apache.arrow.memory.ArrowBuf; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.OwnershipTransferResult; @@ -26,8 +24,8 @@ import org.apache.arrow.util.Preconditions; /** - * A simple reference manager implementation for memory allocated by native code. - * The underlying memory will be released when reference count reach zero. + * A simple reference manager implementation for memory allocated by native code. The underlying + * memory will be released when reference count reach zero. */ public class OrcReferenceManager implements ReferenceManager { private final AtomicInteger bufRefCnt = new AtomicInteger(0); @@ -50,8 +48,8 @@ public boolean release() { @Override public boolean release(int decrement) { - Preconditions.checkState(decrement >= 1, - "ref count decrement should be greater than or equal to 1"); + Preconditions.checkState( + decrement >= 1, "ref count decrement should be greater than or equal to 1"); // decrement the ref count final int refCnt; synchronized (this) { @@ -89,18 +87,21 @@ public ArrowBuf deriveBuffer(ArrowBuf sourceBuffer, long index, long length) { final long derivedBufferAddress = sourceBuffer.memoryAddress() + index; // create new ArrowBuf - final ArrowBuf derivedBuf = new ArrowBuf( + final ArrowBuf derivedBuf = + new ArrowBuf( this, null, length, // length (in bytes) in the underlying memory chunk for this new ArrowBuf - derivedBufferAddress // starting byte address in the underlying memory for this new ArrowBuf, + derivedBufferAddress // starting byte address in the underlying memory for this new + // ArrowBuf, ); return derivedBuf; } @Override - public OwnershipTransferResult transferOwnership(ArrowBuf sourceBuffer, BufferAllocator targetAllocator) { + public OwnershipTransferResult transferOwnership( + ArrowBuf sourceBuffer, BufferAllocator targetAllocator) { throw new UnsupportedOperationException(); } diff --git a/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcStripeReader.java b/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcStripeReader.java index 484296d92e039..52f5cf429a48d 100644 --- a/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcStripeReader.java +++ b/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcStripeReader.java @@ -14,13 +14,11 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.orc; import java.io.IOException; import java.util.ArrayList; import java.util.stream.Collectors; - import org.apache.arrow.memory.ArrowBuf; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.vector.ipc.ArrowReader; @@ -33,19 +31,16 @@ import org.apache.arrow.vector.types.pojo.Schema; import org.apache.arrow.vector.util.ByteArrayReadableSeekableByteChannel; -/** - * Orc stripe that load data into ArrowRecordBatch. - */ +/** Orc stripe that load data into ArrowRecordBatch. */ public class OrcStripeReader extends ArrowReader { - /** - * reference to native stripe reader instance. - */ + /** reference to native stripe reader instance. */ private final long nativeInstanceId; /** * Construct a new instance. - * @param nativeInstanceId nativeInstanceId of the stripe reader instance, obtained by - * calling nextStripeReader from OrcReaderJniWrapper + * + * @param nativeInstanceId nativeInstanceId of the stripe reader instance, obtained by calling + * nextStripeReader from OrcReaderJniWrapper * @param allocator memory allocator for accounting. */ OrcStripeReader(long nativeInstanceId, BufferAllocator allocator) { @@ -62,18 +57,20 @@ public boolean loadNextBatch() throws IOException { ArrayList buffers = new ArrayList<>(); for (OrcMemoryJniWrapper buffer : recordBatch.buffers) { - buffers.add(new ArrowBuf( + buffers.add( + new ArrowBuf( new OrcReferenceManager(buffer), null, (int) buffer.getSize(), buffer.getMemoryAddress())); } - loadRecordBatch(new ArrowRecordBatch( + loadRecordBatch( + new ArrowRecordBatch( recordBatch.length, recordBatch.nodes.stream() - .map(buf -> new ArrowFieldNode(buf.getLength(), buf.getNullCount())) - .collect(Collectors.toList()), + .map(buf -> new ArrowFieldNode(buf.getLength(), buf.getNullCount())) + .collect(Collectors.toList()), buffers)); return true; } @@ -83,7 +80,6 @@ public long bytesRead() { return 0; } - @Override protected void closeReadSource() throws IOException { OrcStripeReaderJniWrapper.close(nativeInstanceId); @@ -94,9 +90,8 @@ protected Schema readSchema() throws IOException { byte[] schemaBytes = OrcStripeReaderJniWrapper.getSchema(nativeInstanceId); try (MessageChannelReader schemaReader = - new MessageChannelReader( - new ReadChannel( - new ByteArrayReadableSeekableByteChannel(schemaBytes)), allocator)) { + new MessageChannelReader( + new ReadChannel(new ByteArrayReadableSeekableByteChannel(schemaBytes)), allocator)) { MessageResult result = schemaReader.readNext(); if (result == null) { diff --git a/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcStripeReaderJniWrapper.java b/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcStripeReaderJniWrapper.java index 1dd96986108b4..e7b691087fb96 100644 --- a/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcStripeReaderJniWrapper.java +++ b/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcStripeReaderJniWrapper.java @@ -14,16 +14,14 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.orc; -/** - * JNI wrapper for orc stripe reader. - */ +/** JNI wrapper for orc stripe reader. */ class OrcStripeReaderJniWrapper { /** * Get the schema of current stripe. + * * @param readerId id of the stripe reader instance. * @return serialized schema. */ @@ -31,14 +29,15 @@ class OrcStripeReaderJniWrapper { /** * Load next record batch. + * * @param readerId id of the stripe reader instance. - * @return loaded record batch, return null when reached - * the end of current stripe. + * @return loaded record batch, return null when reached the end of current stripe. */ static native OrcRecordBatch next(long readerId); /** * Release resources of underlying reader. + * * @param readerId id of the stripe reader instance. */ static native void close(long readerId); diff --git a/java/adapter/orc/src/test/java/org/apache/arrow/adapter/orc/OrcReaderTest.java b/java/adapter/orc/src/test/java/org/apache/arrow/adapter/orc/OrcReaderTest.java index 4153a35a61c67..17098806be72a 100644 --- a/java/adapter/orc/src/test/java/org/apache/arrow/adapter/orc/OrcReaderTest.java +++ b/java/adapter/orc/src/test/java/org/apache/arrow/adapter/orc/OrcReaderTest.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.orc; import static org.junit.Assert.assertEquals; @@ -24,8 +23,6 @@ import java.io.File; import java.nio.charset.StandardCharsets; import java.util.List; - - import org.apache.arrow.memory.RootAllocator; import org.apache.arrow.vector.FieldVector; import org.apache.arrow.vector.IntVector; @@ -45,11 +42,9 @@ import org.junit.Test; import org.junit.rules.TemporaryFolder; - public class OrcReaderTest { - @Rule - public TemporaryFolder testFolder = new TemporaryFolder(); + @Rule public TemporaryFolder testFolder = new TemporaryFolder(); private static final int MAX_ALLOCATION = 8 * 1024; private static RootAllocator allocator; @@ -64,8 +59,10 @@ public void testOrcJniReader() throws Exception { TypeDescription schema = TypeDescription.fromString("struct"); File testFile = new File(testFolder.getRoot(), "test-orc"); - Writer writer = OrcFile.createWriter(new Path(testFile.getAbsolutePath()), - OrcFile.writerOptions(new Configuration()).setSchema(schema)); + Writer writer = + OrcFile.createWriter( + new Path(testFile.getAbsolutePath()), + OrcFile.writerOptions(new Configuration()).setSchema(schema)); VectorizedRowBatch batch = schema.createRowBatch(); LongColumnVector longColumnVector = (LongColumnVector) batch.cols[0]; BytesColumnVector bytesColumnVector = (BytesColumnVector) batch.cols[1]; diff --git a/java/algorithm/pom.xml b/java/algorithm/pom.xml index 92fa5c8553505..5984cce766d9e 100644 --- a/java/algorithm/pom.xml +++ b/java/algorithm/pom.xml @@ -20,6 +20,11 @@ Arrow Algorithms (Experimental/Contrib) A collection of algorithms for working with ValueVectors. + + dev/checkstyle/checkstyle-spotless.xml + none + + org.apache.arrow @@ -44,9 +49,7 @@ org.immutables - value + value-annotations - - diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/deduplicate/DeduplicationUtils.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/deduplicate/DeduplicationUtils.java index 8811e43d3d08d..e9364b2a85b7b 100644 --- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/deduplicate/DeduplicationUtils.java +++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/deduplicate/DeduplicationUtils.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.deduplicate; import org.apache.arrow.memory.ArrowBuf; @@ -26,18 +25,18 @@ import org.apache.arrow.vector.compare.RangeEqualsVisitor; import org.apache.arrow.vector.util.DataSizeRoundingUtil; -/** - * Utilities for vector deduplication. - */ +/** Utilities for vector deduplication. */ class DeduplicationUtils { /** * Gets the start positions of the first distinct values in a vector. + * * @param vector the target vector. * @param runStarts the bit set to hold the start positions. * @param vector type. */ - public static void populateRunStartIndicators(V vector, ArrowBuf runStarts) { + public static void populateRunStartIndicators( + V vector, ArrowBuf runStarts) { int bufSize = DataSizeRoundingUtil.divideBy8Ceil(vector.getValueCount()); Preconditions.checkArgument(runStarts.capacity() >= bufSize); runStarts.setZero(0, bufSize); @@ -55,6 +54,7 @@ public static void populateRunStartIndicators(V vector, /** * Gets the run lengths, given the start positions. + * * @param runStarts the bit set for start positions. * @param runLengths the run length vector to populate. * @param valueCount the number of values in the bit set. @@ -76,15 +76,15 @@ public static void populateRunLengths(ArrowBuf runStarts, IntVector runLengths, } /** - * Gets distinct values from the input vector by removing adjacent - * duplicated values. + * Gets distinct values from the input vector by removing adjacent duplicated values. + * * @param indicators the bit set containing the start positions of distinct values. * @param inputVector the input vector. * @param outputVector the output vector. * @param vector type. */ public static void populateDeduplicatedValues( - ArrowBuf indicators, V inputVector, V outputVector) { + ArrowBuf indicators, V inputVector, V outputVector) { int dstIdx = 0; for (int srcIdx = 0; srcIdx < inputVector.getValueCount(); srcIdx++) { if (BitVectorHelper.get(indicators, srcIdx) != 0) { diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/deduplicate/VectorRunDeduplicator.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/deduplicate/VectorRunDeduplicator.java index 5ef03cbe4a734..4e49de14f5956 100644 --- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/deduplicate/VectorRunDeduplicator.java +++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/deduplicate/VectorRunDeduplicator.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.deduplicate; import org.apache.arrow.memory.ArrowBuf; @@ -26,29 +25,28 @@ import org.apache.arrow.vector.util.DataSizeRoundingUtil; /** - * Remove adjacent equal elements from a vector. - * If the vector is sorted, it removes all duplicated values in the vector. + * Remove adjacent equal elements from a vector. If the vector is sorted, it removes all duplicated + * values in the vector. + * * @param vector type. */ public class VectorRunDeduplicator implements AutoCloseable { /** - * Bit set for distinct values. - * If the value at some index is not equal to the previous value, - * its bit is set to 1, otherwise its bit is set to 0. + * Bit set for distinct values. If the value at some index is not equal to the previous value, its + * bit is set to 1, otherwise its bit is set to 0. */ private ArrowBuf distinctValueBuffer; - /** - * The vector to deduplicate. - */ + /** The vector to deduplicate. */ private final V vector; private final BufferAllocator allocator; /** * Constructs a vector run deduplicator for a given vector. - * @param vector the vector to deduplicate. Ownership is NOT taken. + * + * @param vector the vector to deduplicate. Ownership is NOT taken. * @param allocator the allocator used for allocating buffers for start indices. */ public VectorRunDeduplicator(V vector, BufferAllocator allocator) { @@ -65,17 +63,20 @@ private void createDistinctValueBuffer() { /** * Gets the number of values which are different from their predecessor. + * * @return the run count. */ public int getRunCount() { if (distinctValueBuffer == null) { createDistinctValueBuffer(); } - return vector.getValueCount() - BitVectorHelper.getNullCount(distinctValueBuffer, vector.getValueCount()); + return vector.getValueCount() + - BitVectorHelper.getNullCount(distinctValueBuffer, vector.getValueCount()); } /** * Gets the vector with deduplicated adjacent values removed. + * * @param outVector the output vector. */ public void populateDeduplicatedValues(V outVector) { @@ -88,6 +89,7 @@ public void populateDeduplicatedValues(V outVector) { /** * Gets the length of each distinct value. + * * @param lengthVector the vector for holding length values. */ public void populateRunLengths(IntVector lengthVector) { @@ -95,7 +97,8 @@ public void populateRunLengths(IntVector lengthVector) { createDistinctValueBuffer(); } - DeduplicationUtils.populateRunLengths(distinctValueBuffer, lengthVector, vector.getValueCount()); + DeduplicationUtils.populateRunLengths( + distinctValueBuffer, lengthVector, vector.getValueCount()); } @Override diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/DictionaryBuilder.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/DictionaryBuilder.java index 398368d1fc612..88c4e4dc65450 100644 --- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/DictionaryBuilder.java +++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/DictionaryBuilder.java @@ -14,33 +14,31 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.dictionary; import org.apache.arrow.vector.ValueVector; /** - * A dictionary builder is intended for the scenario frequently encountered in practice: - * the dictionary is not known a priori, so it is generated dynamically. - * In particular, when a new value arrives, it is tested to check if it is already - * in the dictionary. If so, it is simply neglected, otherwise, it is added to the dictionary. - *

- * The dictionary builder is intended to build a single dictionary. - * So it cannot be used for different dictionaries. - *

+ * A dictionary builder is intended for the scenario frequently encountered in practice: the + * dictionary is not known a priori, so it is generated dynamically. In particular, when a new value + * arrives, it is tested to check if it is already in the dictionary. If so, it is simply neglected, + * otherwise, it is added to the dictionary. + * + *

The dictionary builder is intended to build a single dictionary. So it cannot be used for + * different dictionaries. + * *

Below gives the sample code for using the dictionary builder + * *

{@code
  * DictionaryBuilder dictionaryBuilder = ...
  * ...
  * dictionaryBuild.addValue(newValue);
  * ...
  * }
- *

- *

- * With the above code, the dictionary vector will be populated, - * and it can be retrieved by the {@link DictionaryBuilder#getDictionary()} method. - * After that, dictionary encoding can proceed with the populated dictionary.. - *

+ * + *

With the above code, the dictionary vector will be populated, and it can be retrieved by the + * {@link DictionaryBuilder#getDictionary()} method. After that, dictionary encoding can proceed + * with the populated dictionary.. * * @param the dictionary vector type. */ @@ -58,7 +56,7 @@ public interface DictionaryBuilder { * Try to add an element from the target vector to the dictionary. * * @param targetVector the target vector containing new element. - * @param targetIndex the index of the new element in the target vector. + * @param targetIndex the index of the new element in the target vector. * @return the index of the new element in the dictionary. */ int addValue(V targetVector, int targetIndex); diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/DictionaryEncoder.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/DictionaryEncoder.java index cda7b3bf9540e..16e27c3a23e72 100644 --- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/DictionaryEncoder.java +++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/DictionaryEncoder.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.dictionary; import org.apache.arrow.vector.BaseIntVector; @@ -22,8 +21,9 @@ /** * A dictionary encoder translates one vector into another one based on a dictionary vector. - * According to Arrow specification, the encoded vector must be an integer based vector, which - * is the index of the original vector element in the dictionary. + * According to Arrow specification, the encoded vector must be an integer based vector, which is + * the index of the original vector element in the dictionary. + * * @param type of the encoded vector. * @param type of the vector to encode. It is also the type of the dictionary vector. */ @@ -31,9 +31,10 @@ public interface DictionaryEncoder the dictionary vector type. */ -public class HashTableBasedDictionaryBuilder implements DictionaryBuilder { +public class HashTableBasedDictionaryBuilder + implements DictionaryBuilder { - /** - * The dictionary to be built. - */ + /** The dictionary to be built. */ private final V dictionary; - /** - * If null should be encoded. - */ + /** If null should be encoded. */ private final boolean encodeNull; /** - * The hash map for distinct dictionary entries. - * The key is the pointer to the dictionary element, whereas the value is the index in the dictionary. + * The hash map for distinct dictionary entries. The key is the pointer to the dictionary element, + * whereas the value is the index in the dictionary. */ private HashMap hashMap = new HashMap<>(); - /** - * The hasher used for calculating the hash code. - */ + /** The hasher used for calculating the hash code. */ private final ArrowBufHasher hasher; - /** - * Next pointer to try to add to the hash table. - */ + /** Next pointer to try to add to the hash table. */ private ArrowBufPointer nextPointer; /** @@ -83,7 +73,7 @@ public HashTableBasedDictionaryBuilder(V dictionary, boolean encodeNull) { * * @param dictionary the dictionary to populate. * @param encodeNull if null values should be added to the dictionary. - * @param hasher the hasher used to compute the hash code. + * @param hasher the hasher used to compute the hash code. */ public HashTableBasedDictionaryBuilder(V dictionary, boolean encodeNull, ArrowBufHasher hasher) { this.dictionary = dictionary; @@ -125,7 +115,7 @@ public int addValues(V targetVector) { * Try to add an element from the target vector to the dictionary. * * @param targetVector the target vector containing new element. - * @param targetIndex the index of the new element in the target vector. + * @param targetIndex the index of the new element in the target vector. * @return the index of the new element in the dictionary. */ @Override diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/HashTableDictionaryEncoder.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/HashTableDictionaryEncoder.java index bea1a784c3d6a..ac7a7d32bf597 100644 --- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/HashTableDictionaryEncoder.java +++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/HashTableDictionaryEncoder.java @@ -14,11 +14,9 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.dictionary; import java.util.HashMap; - import org.apache.arrow.memory.util.ArrowBufPointer; import org.apache.arrow.memory.util.hash.ArrowBufHasher; import org.apache.arrow.memory.util.hash.SimpleHasher; @@ -27,43 +25,35 @@ /** * Dictionary encoder based on hash table. + * * @param encoded vector type. * @param decoded vector type, which is also the dictionary type. */ public class HashTableDictionaryEncoder implements DictionaryEncoder { - /** - * The dictionary for encoding/decoding. - * It must be sorted. - */ + /** The dictionary for encoding/decoding. It must be sorted. */ private final D dictionary; - /** - * The hasher used to compute the hash code. - */ + /** The hasher used to compute the hash code. */ private final ArrowBufHasher hasher; - /** - * A flag indicating if null should be encoded. - */ + /** A flag indicating if null should be encoded. */ private final boolean encodeNull; /** - * The hash map for distinct dictionary entries. - * The key is the pointer to the dictionary element, whereas the value is the index in the dictionary. + * The hash map for distinct dictionary entries. The key is the pointer to the dictionary element, + * whereas the value is the index in the dictionary. */ private HashMap hashMap = new HashMap<>(); - /** - * The pointer used to probe each element to encode. - */ + /** The pointer used to probe each element to encode. */ private ArrowBufPointer reusablePointer; /** * Constructs a dictionary encoder. - * @param dictionary the dictionary. * + * @param dictionary the dictionary. */ public HashTableDictionaryEncoder(D dictionary) { this(dictionary, false); @@ -71,20 +61,17 @@ public HashTableDictionaryEncoder(D dictionary) { /** * Constructs a dictionary encoder. + * * @param dictionary the dictionary. - * @param encodeNull a flag indicating if null should be encoded. - * It determines the behaviors for processing null values in the input during encoding/decoding. - *

  • - * For encoding, when a null is encountered in the input, - * 1) If the flag is set to true, the encoder searches for the value in the dictionary, - * and outputs the index in the dictionary. - * 2) If the flag is set to false, the encoder simply produces a null in the output. - *
  • - *
  • - * For decoding, when a null is encountered in the input, - * 1) If the flag is set to true, the decoder should never expect a null in the input. - * 2) If set to false, the decoder simply produces a null in the output. - *
  • + * @param encodeNull a flag indicating if null should be encoded. It determines the behaviors for + * processing null values in the input during encoding/decoding. + *
  • For encoding, when a null is encountered in the input, 1) If the flag is set to true, + * the encoder searches for the value in the dictionary, and outputs the index in the + * dictionary. 2) If the flag is set to false, the encoder simply produces a null in the + * output. + *
  • For decoding, when a null is encountered in the input, 1) If the flag is set to true, + * the decoder should never expect a null in the input. 2) If set to false, the decoder + * simply produces a null in the output. */ public HashTableDictionaryEncoder(D dictionary, boolean encodeNull) { this(dictionary, encodeNull, SimpleHasher.INSTANCE); @@ -92,13 +79,13 @@ public HashTableDictionaryEncoder(D dictionary, boolean encodeNull) { /** * Constructs a dictionary encoder. + * * @param dictionary the dictionary. - * @param encodeNull a flag indicating if null should be encoded. - * It determines the behaviors for processing null values in the input during encoding. - * When a null is encountered in the input, - * 1) If the flag is set to true, the encoder searches for the value in the dictionary, - * and outputs the index in the dictionary. - * 2) If the flag is set to false, the encoder simply produces a null in the output. + * @param encodeNull a flag indicating if null should be encoded. It determines the behaviors for + * processing null values in the input during encoding. When a null is encountered in the + * input, 1) If the flag is set to true, the encoder searches for the value in the dictionary, + * and outputs the index in the dictionary. 2) If the flag is set to false, the encoder simply + * produces a null in the output. * @param hasher the hasher used to calculate the hash code. */ public HashTableDictionaryEncoder(D dictionary, boolean encodeNull, ArrowBufHasher hasher) { @@ -120,12 +107,12 @@ private void buildHashMap() { } /** - * Encodes an input vector by a hash table. - * So the algorithm takes O(n) time, where n is the length of the input vector. + * Encodes an input vector by a hash table. So the algorithm takes O(n) time, where n is the + * length of the input vector. * - * @param input the input vector. + * @param input the input vector. * @param output the output vector. - **/ + */ @Override public void encode(D input, E output) { for (int i = 0; i < input.getValueCount(); i++) { diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/LinearDictionaryEncoder.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/LinearDictionaryEncoder.java index 84a3a96af8ef1..9aeff22005751 100644 --- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/LinearDictionaryEncoder.java +++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/LinearDictionaryEncoder.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.dictionary; import org.apache.arrow.vector.BaseIntVector; @@ -24,20 +23,17 @@ /** * Dictionary encoder based on linear search. + * * @param encoded vector type. * @param decoded vector type, which is also the dictionary type. */ public class LinearDictionaryEncoder implements DictionaryEncoder { - /** - * The dictionary for encoding. - */ + /** The dictionary for encoding. */ private final D dictionary; - /** - * A flag indicating if null should be encoded. - */ + /** A flag indicating if null should be encoded. */ private final boolean encodeNull; private RangeEqualsVisitor equalizer; @@ -46,8 +42,10 @@ public class LinearDictionaryEncoder encoded vector type. * @param decoded vector type, which is also the dictionary type. */ public class SearchDictionaryEncoder implements DictionaryEncoder { - /** - * The dictionary for encoding/decoding. - * It must be sorted. - */ + /** The dictionary for encoding/decoding. It must be sorted. */ private final D dictionary; - /** - * The criteria by which the dictionary is sorted. - */ + /** The criteria by which the dictionary is sorted. */ private final VectorValueComparator comparator; - /** - * A flag indicating if null should be encoded. - */ + /** A flag indicating if null should be encoded. */ private final boolean encodeNull; /** * Constructs a dictionary encoder. + * * @param dictionary the dictionary. It must be in sorted order. * @param comparator the criteria for sorting. */ @@ -57,28 +51,29 @@ public SearchDictionaryEncoder(D dictionary, VectorValueComparator comparator /** * Constructs a dictionary encoder. + * * @param dictionary the dictionary. It must be in sorted order. * @param comparator the criteria for sorting. - * @param encodeNull a flag indicating if null should be encoded. - * It determines the behaviors for processing null values in the input during encoding. - * When a null is encountered in the input, - * 1) If the flag is set to true, the encoder searches for the value in the dictionary, - * and outputs the index in the dictionary. - * 2) If the flag is set to false, the encoder simply produces a null in the output. + * @param encodeNull a flag indicating if null should be encoded. It determines the behaviors for + * processing null values in the input during encoding. When a null is encountered in the + * input, 1) If the flag is set to true, the encoder searches for the value in the dictionary, + * and outputs the index in the dictionary. 2) If the flag is set to false, the encoder simply + * produces a null in the output. */ - public SearchDictionaryEncoder(D dictionary, VectorValueComparator comparator, boolean encodeNull) { + public SearchDictionaryEncoder( + D dictionary, VectorValueComparator comparator, boolean encodeNull) { this.dictionary = dictionary; this.comparator = comparator; this.encodeNull = encodeNull; } /** - * Encodes an input vector by binary search. - * So the algorithm takes O(n * log(m)) time, where n is the length of the input vector, - * and m is the length of the dictionary. + * Encodes an input vector by binary search. So the algorithm takes O(n * log(m)) time, where n is + * the length of the input vector, and m is the length of the dictionary. + * * @param input the input vector. - * @param output the output vector. Note that it must be in a fresh state. At least, - * all its validity bits should be clear. + * @param output the output vector. Note that it must be in a fresh state. At least, all its + * validity bits should be clear. */ @Override public void encode(D input, E output) { diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/SearchTreeBasedDictionaryBuilder.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/SearchTreeBasedDictionaryBuilder.java index f9cd77daa2e76..fca7df067dcff 100644 --- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/SearchTreeBasedDictionaryBuilder.java +++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/SearchTreeBasedDictionaryBuilder.java @@ -14,45 +14,36 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.dictionary; import java.util.TreeSet; - import org.apache.arrow.algorithm.sort.VectorValueComparator; import org.apache.arrow.vector.ValueVector; /** - * This class builds the dictionary based on a binary search tree. - * Each add operation can be finished in O(log(n)) time, - * where n is the current dictionary size. + * This class builds the dictionary based on a binary search tree. Each add operation can be + * finished in O(log(n)) time, where n is the current dictionary size. * * @param the dictionary vector type. */ -public class SearchTreeBasedDictionaryBuilder implements DictionaryBuilder { +public class SearchTreeBasedDictionaryBuilder + implements DictionaryBuilder { - /** - * The dictionary to be built. - */ + /** The dictionary to be built. */ private final V dictionary; - /** - * The criteria for sorting in the search tree. - */ + /** The criteria for sorting in the search tree. */ protected final VectorValueComparator comparator; - /** - * If null should be encoded. - */ + /** If null should be encoded. */ private final boolean encodeNull; - /** - * The search tree for storing the value index. - */ + /** The search tree for storing the value index. */ private TreeSet searchTree; /** * Construct a search tree-based dictionary builder. + * * @param dictionary the dictionary vector. * @param comparator the criteria for value equality. */ @@ -62,11 +53,13 @@ public SearchTreeBasedDictionaryBuilder(V dictionary, VectorValueComparator c /** * Construct a search tree-based dictionary builder. + * * @param dictionary the dictionary vector. * @param comparator the criteria for value equality. * @param encodeNull if null values should be added to the dictionary. */ - public SearchTreeBasedDictionaryBuilder(V dictionary, VectorValueComparator comparator, boolean encodeNull) { + public SearchTreeBasedDictionaryBuilder( + V dictionary, VectorValueComparator comparator, boolean encodeNull) { this.dictionary = dictionary; this.comparator = comparator; this.encodeNull = encodeNull; @@ -76,11 +69,10 @@ public SearchTreeBasedDictionaryBuilder(V dictionary, VectorValueComparator c } /** - * Gets the dictionary built. - * Please note that the dictionary is not in sorted order. - * Instead, its order is determined by the order of element insertion. - * To get the dictionary in sorted order, please use - * {@link SearchTreeBasedDictionaryBuilder#populateSortedDictionary(ValueVector)}. + * Gets the dictionary built. Please note that the dictionary is not in sorted order. Instead, its + * order is determined by the order of element insertion. To get the dictionary in sorted order, + * please use {@link SearchTreeBasedDictionaryBuilder#populateSortedDictionary(ValueVector)}. + * * @return the dictionary. */ @Override @@ -90,6 +82,7 @@ public V getDictionary() { /** * Try to add all values from the target vector to the dictionary. + * * @param targetVector the target vector containing values to probe. * @return the number of values actually added to the dictionary. */ @@ -107,6 +100,7 @@ public int addValues(V targetVector) { /** * Try to add an element from the target vector to the dictionary. + * * @param targetVector the target vector containing new element. * @param targetIndex the index of the new element in the target vector. * @return the index of the new element in the dictionary. @@ -132,8 +126,8 @@ public int addValue(V targetVector, int targetIndex) { } /** - * Gets the sorted dictionary. - * Note that given the binary search tree, the sort can finish in O(n). + * Gets the sorted dictionary. Note that given the binary search tree, the sort can finish in + * O(n). */ public void populateSortedDictionary(V sortedDictionary) { int idx = 0; diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/misc/PartialSumUtils.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/misc/PartialSumUtils.java index f5e95cf1033f5..5492676af1a2e 100644 --- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/misc/PartialSumUtils.java +++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/misc/PartialSumUtils.java @@ -14,26 +14,24 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.misc; import org.apache.arrow.vector.BaseIntVector; -/** - * Partial sum related utilities. - */ +/** Partial sum related utilities. */ public class PartialSumUtils { /** - * Converts an input vector to a partial sum vector. - * This is an inverse operation of {@link PartialSumUtils#toDeltaVector(BaseIntVector, BaseIntVector)}. - * Suppose we have input vector a and output vector b. - * Then we have b(0) = sumBase; b(i + 1) = b(i) + a(i) (i = 0, 1, 2, ...). + * Converts an input vector to a partial sum vector. This is an inverse operation of {@link + * PartialSumUtils#toDeltaVector(BaseIntVector, BaseIntVector)}. Suppose we have input vector a + * and output vector b. Then we have b(0) = sumBase; b(i + 1) = b(i) + a(i) (i = 0, 1, 2, ...). + * * @param deltaVector the input vector. * @param partialSumVector the output vector. * @param sumBase the base of the partial sums. */ - public static void toPartialSumVector(BaseIntVector deltaVector, BaseIntVector partialSumVector, long sumBase) { + public static void toPartialSumVector( + BaseIntVector deltaVector, BaseIntVector partialSumVector, long sumBase) { long sum = sumBase; partialSumVector.setWithPossibleTruncate(0, sumBase); @@ -45,10 +43,10 @@ public static void toPartialSumVector(BaseIntVector deltaVector, BaseIntVector p } /** - * Converts an input vector to the delta vector. - * This is an inverse operation of {@link PartialSumUtils#toPartialSumVector(BaseIntVector, BaseIntVector, long)}. - * Suppose we have input vector a and output vector b. - * Then we have b(i) = a(i + 1) - a(i) (i = 0, 1, 2, ...). + * Converts an input vector to the delta vector. This is an inverse operation of {@link + * PartialSumUtils#toPartialSumVector(BaseIntVector, BaseIntVector, long)}. Suppose we have input + * vector a and output vector b. Then we have b(i) = a(i + 1) - a(i) (i = 0, 1, 2, ...). + * * @param partialSumVector the input vector. * @param deltaVector the output vector. */ @@ -61,18 +59,18 @@ public static void toDeltaVector(BaseIntVector partialSumVector, BaseIntVector d } /** - * Given a value and a partial sum vector, finds its position in the partial sum vector. - * In particular, given an integer value a and partial sum vector v, we try to find a - * position i, so that v(i) <= a < v(i + 1). - * The algorithm is based on binary search, so it takes O(log(n)) time, where n is - * the length of the partial sum vector. + * Given a value and a partial sum vector, finds its position in the partial sum vector. In + * particular, given an integer value a and partial sum vector v, we try to find a position i, so + * that v(i) <= a < v(i + 1). The algorithm is based on binary search, so it takes O(log(n)) time, + * where n is the length of the partial sum vector. + * * @param partialSumVector the input partial sum vector. * @param value the value to search. * @return the position in the partial sum vector, if any, or -1, if none is found. */ public static int findPositionInPartialSumVector(BaseIntVector partialSumVector, long value) { - if (value < partialSumVector.getValueAsLong(0) || - value >= partialSumVector.getValueAsLong(partialSumVector.getValueCount() - 1)) { + if (value < partialSumVector.getValueAsLong(0) + || value >= partialSumVector.getValueAsLong(partialSumVector.getValueCount() - 1)) { return -1; } @@ -114,6 +112,5 @@ public static int findPositionInPartialSumVector(BaseIntVector partialSumVector, throw new IllegalStateException("Should never get here"); } - private PartialSumUtils() { - } + private PartialSumUtils() {} } diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/rank/VectorRank.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/rank/VectorRank.java index 43c9a5b010e8c..baa2058ffc51f 100644 --- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/rank/VectorRank.java +++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/rank/VectorRank.java @@ -14,11 +14,9 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.rank; import java.util.stream.IntStream; - import org.apache.arrow.algorithm.sort.IndexSorter; import org.apache.arrow.algorithm.sort.VectorValueComparator; import org.apache.arrow.memory.BufferAllocator; @@ -28,21 +26,21 @@ /** * Utility for calculating ranks of vector elements. + * * @param the vector type */ public class VectorRank { private VectorValueComparator comparator; - /** - * Vector indices. - */ + /** Vector indices. */ private IntVector indices; private final BufferAllocator allocator; /** * Constructs a vector rank utility. + * * @param allocator the allocator to use. */ public VectorRank(BufferAllocator allocator) { @@ -50,9 +48,10 @@ public VectorRank(BufferAllocator allocator) { } /** - * Given a rank r, gets the index of the element that is the rth smallest in the vector. - * The operation is performed without changing the vector, and takes O(n) time, - * where n is the length of the vector. + * Given a rank r, gets the index of the element that is the rth smallest in the vector. The + * operation is performed without changing the vector, and takes O(n) time, where n is the length + * of the vector. + * * @param vector the vector from which to get the element index. * @param comparator the criteria for vector element comparison. * @param rank the rank to determine. diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/search/ParallelSearcher.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/search/ParallelSearcher.java index 6226921b22ed6..6a48019edc3eb 100644 --- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/search/ParallelSearcher.java +++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/search/ParallelSearcher.java @@ -14,49 +14,40 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.search; import java.util.concurrent.CompletableFuture; import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutorService; import java.util.concurrent.Future; - import org.apache.arrow.algorithm.sort.VectorValueComparator; import org.apache.arrow.vector.ValueVector; import org.apache.arrow.vector.compare.Range; import org.apache.arrow.vector.compare.RangeEqualsVisitor; /** - * Search for a value in the vector by multiple threads. - * This is often used in scenarios where the vector is large or - * low response time is required. + * Search for a value in the vector by multiple threads. This is often used in scenarios where the + * vector is large or low response time is required. + * * @param the vector type. */ public class ParallelSearcher { - /** - * The target vector to search. - */ + /** The target vector to search. */ private final V vector; - /** - * The thread pool. - */ + /** The thread pool. */ private final ExecutorService threadPool; - /** - * The number of threads to use. - */ + /** The number of threads to use. */ private final int numThreads; - /** - * The position of the key in the target vector, if any. - */ + /** The position of the key in the target vector, if any. */ private volatile int keyPosition = -1; /** * Constructs a parallel searcher. + * * @param vector the vector to search. * @param threadPool the thread pool to use. * @param numThreads the number of threads to use. @@ -77,17 +68,17 @@ private CompletableFuture[] initSearch() { } /** - * Search for the key in the target vector. The element-wise comparison is based on - * {@link RangeEqualsVisitor}, so there are two possible results for each element-wise - * comparison: equal and un-equal. + * Search for the key in the target vector. The element-wise comparison is based on {@link + * RangeEqualsVisitor}, so there are two possible results for each element-wise comparison: equal + * and un-equal. + * * @param keyVector the vector containing the search key. * @param keyIndex the index of the search key in the key vector. - * @return the position of a matched value in the target vector, - * or -1 if none is found. Please note that if there are multiple - * matches of the key in the target vector, this method makes no - * guarantees about which instance is returned. - * For an alternative search implementation that always finds the first match of the key, - * see {@link VectorSearcher#linearSearch(ValueVector, VectorValueComparator, ValueVector, int)}. + * @return the position of a matched value in the target vector, or -1 if none is found. Please + * note that if there are multiple matches of the key in the target vector, this method makes + * no guarantees about which instance is returned. For an alternative search implementation + * that always finds the first match of the key, see {@link + * VectorSearcher#linearSearch(ValueVector, VectorValueComparator, ValueVector, int)}. * @throws ExecutionException if an exception occurs in a thread. * @throws InterruptedException if a thread is interrupted. */ @@ -96,36 +87,38 @@ public int search(V keyVector, int keyIndex) throws ExecutionException, Interrup final int valueCount = vector.getValueCount(); for (int i = 0; i < numThreads; i++) { final int tid = i; - Future unused = threadPool.submit(() -> { - // convert to long to avoid overflow - int start = (int) (((long) valueCount) * tid / numThreads); - int end = (int) ((long) valueCount) * (tid + 1) / numThreads; - - if (start >= end) { - // no data assigned to this task. - futures[tid].complete(false); - return; - } - - RangeEqualsVisitor visitor = new RangeEqualsVisitor(vector, keyVector, null); - Range range = new Range(0, 0, 1); - for (int pos = start; pos < end; pos++) { - if (keyPosition != -1) { - // the key has been found by another task - futures[tid].complete(false); - return; - } - range.setLeftStart(pos).setRightStart(keyIndex); - if (visitor.rangeEquals(range)) { - keyPosition = pos; - futures[tid].complete(true); - return; - } - } - - // no match value is found. - futures[tid].complete(false); - }); + Future unused = + threadPool.submit( + () -> { + // convert to long to avoid overflow + int start = (int) (((long) valueCount) * tid / numThreads); + int end = (int) ((long) valueCount) * (tid + 1) / numThreads; + + if (start >= end) { + // no data assigned to this task. + futures[tid].complete(false); + return; + } + + RangeEqualsVisitor visitor = new RangeEqualsVisitor(vector, keyVector, null); + Range range = new Range(0, 0, 1); + for (int pos = start; pos < end; pos++) { + if (keyPosition != -1) { + // the key has been found by another task + futures[tid].complete(false); + return; + } + range.setLeftStart(pos).setRightStart(keyIndex); + if (visitor.rangeEquals(range)) { + keyPosition = pos; + futures[tid].complete(true); + return; + } + } + + // no match value is found. + futures[tid].complete(false); + }); } CompletableFuture.allOf(futures).get(); @@ -133,56 +126,58 @@ public int search(V keyVector, int keyIndex) throws ExecutionException, Interrup } /** - * Search for the key in the target vector. The element-wise comparison is based on - * {@link VectorValueComparator}, so there are three possible results for each element-wise - * comparison: less than, equal to and greater than. + * Search for the key in the target vector. The element-wise comparison is based on {@link + * VectorValueComparator}, so there are three possible results for each element-wise comparison: + * less than, equal to and greater than. + * * @param keyVector the vector containing the search key. * @param keyIndex the index of the search key in the key vector. * @param comparator the comparator for comparing the key against vector elements. - * @return the position of a matched value in the target vector, - * or -1 if none is found. Please note that if there are multiple - * matches of the key in the target vector, this method makes no - * guarantees about which instance is returned. - * For an alternative search implementation that always finds the first match of the key, - * see {@link VectorSearcher#linearSearch(ValueVector, VectorValueComparator, ValueVector, int)}. + * @return the position of a matched value in the target vector, or -1 if none is found. Please + * note that if there are multiple matches of the key in the target vector, this method makes + * no guarantees about which instance is returned. For an alternative search implementation + * that always finds the first match of the key, see {@link + * VectorSearcher#linearSearch(ValueVector, VectorValueComparator, ValueVector, int)}. * @throws ExecutionException if an exception occurs in a thread. * @throws InterruptedException if a thread is interrupted. */ - public int search( - V keyVector, int keyIndex, VectorValueComparator comparator) throws ExecutionException, InterruptedException { + public int search(V keyVector, int keyIndex, VectorValueComparator comparator) + throws ExecutionException, InterruptedException { final CompletableFuture[] futures = initSearch(); final int valueCount = vector.getValueCount(); for (int i = 0; i < numThreads; i++) { final int tid = i; - Future unused = threadPool.submit(() -> { - // convert to long to avoid overflow - int start = (int) (((long) valueCount) * tid / numThreads); - int end = (int) ((long) valueCount) * (tid + 1) / numThreads; - - if (start >= end) { - // no data assigned to this task. - futures[tid].complete(false); - return; - } - - VectorValueComparator localComparator = comparator.createNew(); - localComparator.attachVectors(vector, keyVector); - for (int pos = start; pos < end; pos++) { - if (keyPosition != -1) { - // the key has been found by another task - futures[tid].complete(false); - return; - } - if (localComparator.compare(pos, keyIndex) == 0) { - keyPosition = pos; - futures[tid].complete(true); - return; - } - } - - // no match value is found. - futures[tid].complete(false); - }); + Future unused = + threadPool.submit( + () -> { + // convert to long to avoid overflow + int start = (int) (((long) valueCount) * tid / numThreads); + int end = (int) ((long) valueCount) * (tid + 1) / numThreads; + + if (start >= end) { + // no data assigned to this task. + futures[tid].complete(false); + return; + } + + VectorValueComparator localComparator = comparator.createNew(); + localComparator.attachVectors(vector, keyVector); + for (int pos = start; pos < end; pos++) { + if (keyPosition != -1) { + // the key has been found by another task + futures[tid].complete(false); + return; + } + if (localComparator.compare(pos, keyIndex) == 0) { + keyPosition = pos; + futures[tid].complete(true); + return; + } + } + + // no match value is found. + futures[tid].complete(false); + }); } CompletableFuture.allOf(futures).get(); diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/search/VectorRangeSearcher.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/search/VectorRangeSearcher.java index 249194843f101..c7905dd8956c8 100644 --- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/search/VectorRangeSearcher.java +++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/search/VectorRangeSearcher.java @@ -1,108 +1,105 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.arrow.algorithm.search; - -import org.apache.arrow.algorithm.sort.VectorValueComparator; -import org.apache.arrow.vector.ValueVector; - -/** - * Search for the range of a particular element in the target vector. - */ -public class VectorRangeSearcher { - - /** - * Result returned when a search fails. - */ - public static final int SEARCH_FAIL_RESULT = -1; - - /** - * Search for the first occurrence of an element. - * The search is based on the binary search algorithm. So the target vector must be sorted. - * @param targetVector the vector from which to perform the search. - * @param comparator the criterion for the comparison. - * @param keyVector the vector containing the element to search. - * @param keyIndex the index of the search key in the key vector. - * @param the vector type. - * @return the index of the first matched element if any, and -1 otherwise. - */ - public static int getFirstMatch( - V targetVector, VectorValueComparator comparator, V keyVector, int keyIndex) { - comparator.attachVectors(keyVector, targetVector); - - int ret = SEARCH_FAIL_RESULT; - - int low = 0; - int high = targetVector.getValueCount() - 1; - - while (low <= high) { - int mid = low + (high - low) / 2; - int result = comparator.compare(keyIndex, mid); - if (result < 0) { - // the key is smaller - high = mid - 1; - } else if (result > 0) { - // the key is larger - low = mid + 1; - } else { - // an equal element is found - // continue to go left-ward - ret = mid; - high = mid - 1; - } - } - return ret; - } - - /** - * Search for the last occurrence of an element. - * The search is based on the binary search algorithm. So the target vector must be sorted. - * @param targetVector the vector from which to perform the search. - * @param comparator the criterion for the comparison. - * @param keyVector the vector containing the element to search. - * @param keyIndex the index of the search key in the key vector. - * @param the vector type. - * @return the index of the last matched element if any, and -1 otherwise. - */ - public static int getLastMatch( - V targetVector, VectorValueComparator comparator, V keyVector, int keyIndex) { - comparator.attachVectors(keyVector, targetVector); - - int ret = SEARCH_FAIL_RESULT; - - int low = 0; - int high = targetVector.getValueCount() - 1; - - while (low <= high) { - int mid = low + (high - low) / 2; - int result = comparator.compare(keyIndex, mid); - if (result < 0) { - // the key is smaller - high = mid - 1; - } else if (result > 0) { - // the key is larger - low = mid + 1; - } else { - // an equal element is found, - // continue to go right-ward - ret = mid; - low = mid + 1; - } - } - return ret; - } -} +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.arrow.algorithm.search; + +import org.apache.arrow.algorithm.sort.VectorValueComparator; +import org.apache.arrow.vector.ValueVector; + +/** Search for the range of a particular element in the target vector. */ +public class VectorRangeSearcher { + + /** Result returned when a search fails. */ + public static final int SEARCH_FAIL_RESULT = -1; + + /** + * Search for the first occurrence of an element. The search is based on the binary search + * algorithm. So the target vector must be sorted. + * + * @param targetVector the vector from which to perform the search. + * @param comparator the criterion for the comparison. + * @param keyVector the vector containing the element to search. + * @param keyIndex the index of the search key in the key vector. + * @param the vector type. + * @return the index of the first matched element if any, and -1 otherwise. + */ + public static int getFirstMatch( + V targetVector, VectorValueComparator comparator, V keyVector, int keyIndex) { + comparator.attachVectors(keyVector, targetVector); + + int ret = SEARCH_FAIL_RESULT; + + int low = 0; + int high = targetVector.getValueCount() - 1; + + while (low <= high) { + int mid = low + (high - low) / 2; + int result = comparator.compare(keyIndex, mid); + if (result < 0) { + // the key is smaller + high = mid - 1; + } else if (result > 0) { + // the key is larger + low = mid + 1; + } else { + // an equal element is found + // continue to go left-ward + ret = mid; + high = mid - 1; + } + } + return ret; + } + + /** + * Search for the last occurrence of an element. The search is based on the binary search + * algorithm. So the target vector must be sorted. + * + * @param targetVector the vector from which to perform the search. + * @param comparator the criterion for the comparison. + * @param keyVector the vector containing the element to search. + * @param keyIndex the index of the search key in the key vector. + * @param the vector type. + * @return the index of the last matched element if any, and -1 otherwise. + */ + public static int getLastMatch( + V targetVector, VectorValueComparator comparator, V keyVector, int keyIndex) { + comparator.attachVectors(keyVector, targetVector); + + int ret = SEARCH_FAIL_RESULT; + + int low = 0; + int high = targetVector.getValueCount() - 1; + + while (low <= high) { + int mid = low + (high - low) / 2; + int result = comparator.compare(keyIndex, mid); + if (result < 0) { + // the key is smaller + high = mid - 1; + } else if (result > 0) { + // the key is larger + low = mid + 1; + } else { + // an equal element is found, + // continue to go right-ward + ret = mid; + low = mid + 1; + } + } + return ret; + } +} diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/search/VectorSearcher.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/search/VectorSearcher.java index 646bca01bb81d..dd0b4de5d8677 100644 --- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/search/VectorSearcher.java +++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/search/VectorSearcher.java @@ -14,25 +14,21 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.search; import org.apache.arrow.algorithm.sort.VectorValueComparator; import org.apache.arrow.vector.ValueVector; -/** - * Search for a particular element in the vector. - */ +/** Search for a particular element in the vector. */ public final class VectorSearcher { - /** - * Result returned when a search fails. - */ + /** Result returned when a search fails. */ public static final int SEARCH_FAIL_RESULT = -1; /** - * Search for a particular element from the key vector in the target vector by binary search. - * The target vector must be sorted. + * Search for a particular element from the key vector in the target vector by binary search. The + * target vector must be sorted. + * * @param targetVector the vector from which to perform the sort. * @param comparator the criterion for the sort. * @param keyVector the vector containing the element to search. @@ -41,7 +37,7 @@ public final class VectorSearcher { * @return the index of a matched element if any, and -1 otherwise. */ public static int binarySearch( - V targetVector, VectorValueComparator comparator, V keyVector, int keyIndex) { + V targetVector, VectorValueComparator comparator, V keyVector, int keyIndex) { comparator.attachVectors(keyVector, targetVector); // perform binary search @@ -63,7 +59,9 @@ public static int binarySearch( } /** - * Search for a particular element from the key vector in the target vector by traversing the vector in sequence. + * Search for a particular element from the key vector in the target vector by traversing the + * vector in sequence. + * * @param targetVector the vector from which to perform the search. * @param comparator the criterion for element equality. * @param keyVector the vector containing the element to search. @@ -72,7 +70,7 @@ public static int binarySearch( * @return the index of a matched element if any, and -1 otherwise. */ public static int linearSearch( - V targetVector, VectorValueComparator comparator, V keyVector, int keyIndex) { + V targetVector, VectorValueComparator comparator, V keyVector, int keyIndex) { comparator.attachVectors(keyVector, targetVector); for (int i = 0; i < targetVector.getValueCount(); i++) { if (comparator.compare(keyIndex, i) == 0) { @@ -82,7 +80,5 @@ public static int linearSearch( return SEARCH_FAIL_RESULT; } - private VectorSearcher() { - - } + private VectorSearcher() {} } diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/CompositeVectorComparator.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/CompositeVectorComparator.java index ec74598e0eebf..77093d87bc489 100644 --- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/CompositeVectorComparator.java +++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/CompositeVectorComparator.java @@ -14,20 +14,16 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.sort; import org.apache.arrow.vector.ValueVector; /** - * A composite vector comparator compares a number of vectors - * by a number of inner comparators. - *

    - * It works by first using the first comparator, if a non-zero value - * is returned, it simply returns it. Otherwise, it uses the second comparator, - * and so on, until a non-zero value is produced, or all inner comparators have - * been used. - *

    + * A composite vector comparator compares a number of vectors by a number of inner comparators. + * + *

    It works by first using the first comparator, if a non-zero value is returned, it simply + * returns it. Otherwise, it uses the second comparator, and so on, until a non-zero value is + * produced, or all inner comparators have been used. */ public class CompositeVectorComparator extends VectorValueComparator { @@ -62,7 +58,8 @@ public int compare(int index1, int index2) { @Override public VectorValueComparator createNew() { - VectorValueComparator[] newInnerComparators = new VectorValueComparator[innerComparators.length]; + VectorValueComparator[] newInnerComparators = + new VectorValueComparator[innerComparators.length]; for (int i = 0; i < innerComparators.length; i++) { newInnerComparators[i] = innerComparators[i].createNew(); } diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/DefaultVectorComparators.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/DefaultVectorComparators.java index 588876aa99059..ec650cd9dc88b 100644 --- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/DefaultVectorComparators.java +++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/DefaultVectorComparators.java @@ -14,14 +14,12 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.sort; import static org.apache.arrow.vector.complex.BaseRepeatedValueVector.OFFSET_WIDTH; import java.math.BigDecimal; import java.time.Duration; - import org.apache.arrow.memory.util.ArrowBufPointer; import org.apache.arrow.memory.util.ByteFunctionHelpers; import org.apache.arrow.vector.BaseFixedWidthVector; @@ -56,13 +54,12 @@ import org.apache.arrow.vector.complex.RepeatedValueVector; import org.apache.arrow.vector.holders.NullableFixedSizeBinaryHolder; -/** - * Default comparator implementations for different types of vectors. - */ +/** Default comparator implementations for different types of vectors. */ public class DefaultVectorComparators { /** * Create the default comparator for the vector. + * * @param vector the vector. * @param the vector type. * @return the default comparator. @@ -104,7 +101,8 @@ public static VectorValueComparator createDefaultComp } else if (vector instanceof IntervalDayVector) { return (VectorValueComparator) new IntervalDayComparator(); } else if (vector instanceof IntervalMonthDayNanoVector) { - throw new IllegalArgumentException("No default comparator for " + vector.getClass().getCanonicalName()); + throw new IllegalArgumentException( + "No default comparator for " + vector.getClass().getCanonicalName()); } else if (vector instanceof TimeMicroVector) { return (VectorValueComparator) new TimeMicroComparator(); } else if (vector instanceof TimeMilliVector) { @@ -122,7 +120,7 @@ public static VectorValueComparator createDefaultComp return (VectorValueComparator) new VariableWidthComparator(); } else if (vector instanceof RepeatedValueVector) { VectorValueComparator innerComparator = - createDefaultComparator(((RepeatedValueVector) vector).getDataVector()); + createDefaultComparator(((RepeatedValueVector) vector).getDataVector()); return new RepeatedValueComparator(innerComparator); } else if (vector instanceof FixedSizeListVector) { VectorValueComparator innerComparator = @@ -132,13 +130,11 @@ public static VectorValueComparator createDefaultComp return (VectorValueComparator) new NullComparator(); } - throw new IllegalArgumentException("No default comparator for " + vector.getClass().getCanonicalName()); + throw new IllegalArgumentException( + "No default comparator for " + vector.getClass().getCanonicalName()); } - /** - * Default comparator for bytes. - * The comparison is based on values, with null comes first. - */ + /** Default comparator for bytes. The comparison is based on values, with null comes first. */ public static class ByteComparator extends VectorValueComparator { public ByteComparator() { @@ -159,8 +155,8 @@ public VectorValueComparator createNew() { } /** - * Default comparator for short integers. - * The comparison is based on values, with null comes first. + * Default comparator for short integers. The comparison is based on values, with null comes + * first. */ public static class ShortComparator extends VectorValueComparator { @@ -182,8 +178,8 @@ public VectorValueComparator createNew() { } /** - * Default comparator for 32-bit integers. - * The comparison is based on int values, with null comes first. + * Default comparator for 32-bit integers. The comparison is based on int values, with null comes + * first. */ public static class IntComparator extends VectorValueComparator { @@ -205,8 +201,7 @@ public VectorValueComparator createNew() { } /** - * Default comparator for long integers. - * The comparison is based on values, with null comes first. + * Default comparator for long integers. The comparison is based on values, with null comes first. */ public static class LongComparator extends VectorValueComparator { @@ -229,8 +224,8 @@ public VectorValueComparator createNew() { } /** - * Default comparator for unsigned bytes. - * The comparison is based on values, with null comes first. + * Default comparator for unsigned bytes. The comparison is based on values, with null comes + * first. */ public static class UInt1Comparator extends VectorValueComparator { @@ -253,8 +248,8 @@ public VectorValueComparator createNew() { } /** - * Default comparator for unsigned short integer. - * The comparison is based on values, with null comes first. + * Default comparator for unsigned short integer. The comparison is based on values, with null + * comes first. */ public static class UInt2Comparator extends VectorValueComparator { @@ -280,8 +275,8 @@ public VectorValueComparator createNew() { } /** - * Default comparator for unsigned integer. - * The comparison is based on values, with null comes first. + * Default comparator for unsigned integer. The comparison is based on values, with null comes + * first. */ public static class UInt4Comparator extends VectorValueComparator { @@ -303,8 +298,8 @@ public VectorValueComparator createNew() { } /** - * Default comparator for unsigned long integer. - * The comparison is based on values, with null comes first. + * Default comparator for unsigned long integer. The comparison is based on values, with null + * comes first. */ public static class UInt8Comparator extends VectorValueComparator { @@ -326,8 +321,7 @@ public VectorValueComparator createNew() { } /** - * Default comparator for float type. - * The comparison is based on values, with null comes first. + * Default comparator for float type. The comparison is based on values, with null comes first. */ public static class Float4Comparator extends VectorValueComparator { @@ -363,8 +357,7 @@ public VectorValueComparator createNew() { } /** - * Default comparator for double type. - * The comparison is based on values, with null comes first. + * Default comparator for double type. The comparison is based on values, with null comes first. */ public static class Float8Comparator extends VectorValueComparator { @@ -399,10 +392,7 @@ public VectorValueComparator createNew() { } } - /** - * Default comparator for bit type. - * The comparison is based on values, with null comes first. - */ + /** Default comparator for bit type. The comparison is based on values, with null comes first. */ public static class BitComparator extends VectorValueComparator { public BitComparator() { @@ -424,8 +414,7 @@ public VectorValueComparator createNew() { } /** - * Default comparator for DateDay type. - * The comparison is based on values, with null comes first. + * Default comparator for DateDay type. The comparison is based on values, with null comes first. */ public static class DateDayComparator extends VectorValueComparator { @@ -447,8 +436,8 @@ public VectorValueComparator createNew() { } /** - * Default comparator for DateMilli type. - * The comparison is based on values, with null comes first. + * Default comparator for DateMilli type. The comparison is based on values, with null comes + * first. */ public static class DateMilliComparator extends VectorValueComparator { @@ -471,8 +460,8 @@ public VectorValueComparator createNew() { } /** - * Default comparator for Decimal256 type. - * The comparison is based on values, with null comes first. + * Default comparator for Decimal256 type. The comparison is based on values, with null comes + * first. */ public static class Decimal256Comparator extends VectorValueComparator { @@ -495,8 +484,7 @@ public VectorValueComparator createNew() { } /** - * Default comparator for Decimal type. - * The comparison is based on values, with null comes first. + * Default comparator for Decimal type. The comparison is based on values, with null comes first. */ public static class DecimalComparator extends VectorValueComparator { @@ -519,8 +507,7 @@ public VectorValueComparator createNew() { } /** - * Default comparator for Duration type. - * The comparison is based on values, with null comes first. + * Default comparator for Duration type. The comparison is based on values, with null comes first. */ public static class DurationComparator extends VectorValueComparator { @@ -543,8 +530,8 @@ public VectorValueComparator createNew() { } /** - * Default comparator for IntervalDay type. - * The comparison is based on values, with null comes first. + * Default comparator for IntervalDay type. The comparison is based on values, with null comes + * first. */ public static class IntervalDayComparator extends VectorValueComparator { @@ -567,8 +554,8 @@ public VectorValueComparator createNew() { } /** - * Default comparator for TimeMicro type. - * The comparison is based on values, with null comes first. + * Default comparator for TimeMicro type. The comparison is based on values, with null comes + * first. */ public static class TimeMicroComparator extends VectorValueComparator { @@ -591,8 +578,8 @@ public VectorValueComparator createNew() { } /** - * Default comparator for TimeMilli type. - * The comparison is based on values, with null comes first. + * Default comparator for TimeMilli type. The comparison is based on values, with null comes + * first. */ public static class TimeMilliComparator extends VectorValueComparator { @@ -615,8 +602,7 @@ public VectorValueComparator createNew() { } /** - * Default comparator for TimeNano type. - * The comparison is based on values, with null comes first. + * Default comparator for TimeNano type. The comparison is based on values, with null comes first. */ public static class TimeNanoComparator extends VectorValueComparator { @@ -639,8 +625,7 @@ public VectorValueComparator createNew() { } /** - * Default comparator for TimeSec type. - * The comparison is based on values, with null comes first. + * Default comparator for TimeSec type. The comparison is based on values, with null comes first. */ public static class TimeSecComparator extends VectorValueComparator { @@ -663,8 +648,7 @@ public VectorValueComparator createNew() { } /** - * Default comparator for TimeSec type. - * The comparison is based on values, with null comes first. + * Default comparator for TimeSec type. The comparison is based on values, with null comes first. */ public static class TimeStampComparator extends VectorValueComparator { @@ -687,10 +671,11 @@ public VectorValueComparator createNew() { } /** - * Default comparator for {@link org.apache.arrow.vector.FixedSizeBinaryVector}. - * The comparison is in lexicographic order, with null comes first. + * Default comparator for {@link org.apache.arrow.vector.FixedSizeBinaryVector}. The comparison is + * in lexicographic order, with null comes first. */ - public static class FixedSizeBinaryComparator extends VectorValueComparator { + public static class FixedSizeBinaryComparator + extends VectorValueComparator { @Override public int compare(int index1, int index2) { @@ -720,9 +705,7 @@ public VectorValueComparator createNew() { } } - /** - * Default comparator for {@link org.apache.arrow.vector.NullVector}. - */ + /** Default comparator for {@link org.apache.arrow.vector.NullVector}. */ public static class NullComparator extends VectorValueComparator { @Override public int compare(int index1, int index2) { @@ -742,8 +725,8 @@ public VectorValueComparator createNew() { } /** - * Default comparator for {@link org.apache.arrow.vector.VariableWidthVector}. - * The comparison is in lexicographic order, with null comes first. + * Default comparator for {@link org.apache.arrow.vector.VariableWidthVector}. The comparison is + * in lexicographic order, with null comes first. */ public static class VariableWidthComparator extends VectorValueComparator { @@ -772,12 +755,13 @@ public VectorValueComparator createNew() { } /** - * Default comparator for {@link RepeatedValueVector}. - * It works by comparing the underlying vector in a lexicographic order. + * Default comparator for {@link RepeatedValueVector}. It works by comparing the underlying vector + * in a lexicographic order. + * * @param inner vector type. */ public static class RepeatedValueComparator - extends VectorValueComparator { + extends VectorValueComparator { private final VectorValueComparator innerComparator; @@ -823,8 +807,9 @@ public void attachVectors(RepeatedValueVector vector1, RepeatedValueVector vecto } /** - * Default comparator for {@link RepeatedValueVector}. - * It works by comparing the underlying vector in a lexicographic order. + * Default comparator for {@link RepeatedValueVector}. It works by comparing the underlying vector + * in a lexicographic order. + * * @param inner vector type. */ public static class FixedSizeListComparator @@ -869,6 +854,5 @@ public void attachVectors(FixedSizeListVector vector1, FixedSizeListVector vecto } } - private DefaultVectorComparators() { - } + private DefaultVectorComparators() {} } diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/FixedWidthInPlaceVectorSorter.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/FixedWidthInPlaceVectorSorter.java index aaa7ba117c3ba..ea2b344a1eabb 100644 --- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/FixedWidthInPlaceVectorSorter.java +++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/FixedWidthInPlaceVectorSorter.java @@ -14,20 +14,22 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.sort; import org.apache.arrow.vector.BaseFixedWidthVector; /** - * Default in-place sorter for fixed-width vectors. - * It is based on quick-sort, with average time complexity O(n*log(n)). + * Default in-place sorter for fixed-width vectors. It is based on quick-sort, with average time + * complexity O(n*log(n)). + * * @param vector type. */ -public class FixedWidthInPlaceVectorSorter implements InPlaceVectorSorter { +public class FixedWidthInPlaceVectorSorter + implements InPlaceVectorSorter { /** - * If the number of items is smaller than this threshold, we will use another algorithm to sort the data. + * If the number of items is smaller than this threshold, we will use another algorithm to sort + * the data. */ public static final int CHANGE_ALGORITHM_THRESHOLD = 15; @@ -35,15 +37,10 @@ public class FixedWidthInPlaceVectorSorter imple VectorValueComparator comparator; - /** - * The vector to sort. - */ + /** The vector to sort. */ V vec; - /** - * The buffer to hold the pivot. - * It always has length 1. - */ + /** The buffer to hold the pivot. It always has length 1. */ V pivotBuffer; @Override @@ -99,9 +96,7 @@ private void quickSort() { } } - /** - * Select the pivot as the median of 3 samples. - */ + /** Select the pivot as the median of 3 samples. */ void choosePivot(int low, int high) { // we need at least 3 items if (high - low + 1 < STOP_CHOOSING_PIVOT_THRESHOLD) { diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/FixedWidthOutOfPlaceVectorSorter.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/FixedWidthOutOfPlaceVectorSorter.java index 05a4585792dc2..817e890a5abe1 100644 --- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/FixedWidthOutOfPlaceVectorSorter.java +++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/FixedWidthOutOfPlaceVectorSorter.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.sort; import org.apache.arrow.memory.ArrowBuf; @@ -26,18 +25,21 @@ import org.apache.arrow.vector.IntVector; /** - * Default out-of-place sorter for fixed-width vectors. - * It is an out-of-place sort, with time complexity O(n*log(n)). + * Default out-of-place sorter for fixed-width vectors. It is an out-of-place sort, with time + * complexity O(n*log(n)). + * * @param vector type. */ -public class FixedWidthOutOfPlaceVectorSorter implements OutOfPlaceVectorSorter { +public class FixedWidthOutOfPlaceVectorSorter + implements OutOfPlaceVectorSorter { protected IndexSorter indexSorter = new IndexSorter<>(); @Override public void sortOutOfPlace(V srcVector, V dstVector, VectorValueComparator comparator) { if (srcVector instanceof BitVector) { - throw new IllegalArgumentException("BitVector is not supported with FixedWidthOutOfPlaceVectorSorter."); + throw new IllegalArgumentException( + "BitVector is not supported with FixedWidthOutOfPlaceVectorSorter."); } comparator.attachVector(srcVector); @@ -49,15 +51,18 @@ public void sortOutOfPlace(V srcVector, V dstVector, VectorValueComparator co ArrowBuf dstValueBuffer = dstVector.getDataBuffer(); // check buffer size - Preconditions.checkArgument(dstValidityBuffer.capacity() * 8 >= srcVector.getValueCount(), - "Not enough capacity for the validity buffer of the dst vector. " + - "Expected capacity %s, actual capacity %s", - (srcVector.getValueCount() + 7) / 8, dstValidityBuffer.capacity()); + Preconditions.checkArgument( + dstValidityBuffer.capacity() * 8 >= srcVector.getValueCount(), + "Not enough capacity for the validity buffer of the dst vector. " + + "Expected capacity %s, actual capacity %s", + (srcVector.getValueCount() + 7) / 8, + dstValidityBuffer.capacity()); Preconditions.checkArgument( dstValueBuffer.capacity() >= srcVector.getValueCount() * ((long) srcVector.getTypeWidth()), - "Not enough capacity for the data buffer of the dst vector. " + - "Expected capacity %s, actual capacity %s", - srcVector.getValueCount() * srcVector.getTypeWidth(), dstValueBuffer.capacity()); + "Not enough capacity for the data buffer of the dst vector. " + + "Expected capacity %s, actual capacity %s", + srcVector.getValueCount() * srcVector.getTypeWidth(), + dstValueBuffer.capacity()); // sort value indices try (IntVector sortedIndices = new IntVector("", srcVector.getAllocator())) { @@ -73,9 +78,9 @@ public void sortOutOfPlace(V srcVector, V dstVector, VectorValueComparator co } else { BitVectorHelper.setBit(dstValidityBuffer, dstIndex); MemoryUtil.UNSAFE.copyMemory( - srcValueBuffer.memoryAddress() + srcIndex * ((long) valueWidth), - dstValueBuffer.memoryAddress() + dstIndex * ((long) valueWidth), - valueWidth); + srcValueBuffer.memoryAddress() + srcIndex * ((long) valueWidth), + dstValueBuffer.memoryAddress() + dstIndex * ((long) valueWidth), + valueWidth); } } } diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/GeneralOutOfPlaceVectorSorter.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/GeneralOutOfPlaceVectorSorter.java index 9ea39f638aebe..18f5e94314f83 100644 --- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/GeneralOutOfPlaceVectorSorter.java +++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/GeneralOutOfPlaceVectorSorter.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.sort; import org.apache.arrow.util.Preconditions; @@ -22,23 +21,26 @@ import org.apache.arrow.vector.ValueVector; /** - * An out-of-place sorter for vectors of arbitrary type, with time complexity O(n*log(n)). - * Since it does not make any assumptions about the memory layout of the vector, its performance - * can be sub-optimal. So if another sorter is applicable ({@link FixedWidthInPlaceVectorSorter}), - * it should be used in preference. + * An out-of-place sorter for vectors of arbitrary type, with time complexity O(n*log(n)). Since it + * does not make any assumptions about the memory layout of the vector, its performance can be + * sub-optimal. So if another sorter is applicable ({@link FixedWidthInPlaceVectorSorter}), it + * should be used in preference. * * @param vector type. */ -public class GeneralOutOfPlaceVectorSorter implements OutOfPlaceVectorSorter { +public class GeneralOutOfPlaceVectorSorter + implements OutOfPlaceVectorSorter { @Override public void sortOutOfPlace(V srcVector, V dstVector, VectorValueComparator comparator) { comparator.attachVector(srcVector); // check vector capacity - Preconditions.checkArgument(dstVector.getValueCapacity() >= srcVector.getValueCount(), - "Not enough capacity for the target vector. " + - "Expected capacity %s, actual capacity %s", srcVector.getValueCount(), dstVector.getValueCapacity()); + Preconditions.checkArgument( + dstVector.getValueCapacity() >= srcVector.getValueCount(), + "Not enough capacity for the target vector. " + "Expected capacity %s, actual capacity %s", + srcVector.getValueCount(), + dstVector.getValueCapacity()); // sort value indices try (IntVector sortedIndices = new IntVector("", srcVector.getAllocator())) { diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/InPlaceVectorSorter.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/InPlaceVectorSorter.java index 19817fe76b8ec..ba41bb9e4eac7 100644 --- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/InPlaceVectorSorter.java +++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/InPlaceVectorSorter.java @@ -14,15 +14,13 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.sort; import org.apache.arrow.vector.ValueVector; /** - * Basic interface for sorting a vector in-place. - * That is, the sorting is performed by modifying the input vector, - * without creating a new sorted vector. + * Basic interface for sorting a vector in-place. That is, the sorting is performed by modifying the + * input vector, without creating a new sorted vector. * * @param the vector type. */ @@ -30,6 +28,7 @@ public interface InPlaceVectorSorter { /** * Sort a vector in-place. + * * @param vec the vector to sort. * @param comparator the criteria for sort. */ diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/IndexSorter.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/IndexSorter.java index 3072717f43123..b8ce3289d2889 100644 --- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/IndexSorter.java +++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/IndexSorter.java @@ -14,39 +14,35 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.sort; import java.util.stream.IntStream; - import org.apache.arrow.vector.IntVector; import org.apache.arrow.vector.ValueVector; /** * Sorter for the indices of a vector. + * * @param vector type. */ public class IndexSorter { /** - * If the number of items is smaller than this threshold, we will use another algorithm to sort the data. + * If the number of items is smaller than this threshold, we will use another algorithm to sort + * the data. */ public static final int CHANGE_ALGORITHM_THRESHOLD = 15; - /** - * Comparator for vector indices. - */ + /** Comparator for vector indices. */ private VectorValueComparator comparator; - /** - * Vector indices to sort. - */ + /** Vector indices to sort. */ private IntVector indices; /** - * Sorts indices, by quick-sort. Suppose the vector is denoted by v. - * After calling this method, the following relations hold: - * v(indices[0]) <= v(indices[1]) <= ... + * Sorts indices, by quick-sort. Suppose the vector is denoted by v. After calling this method, + * the following relations hold: v(indices[0]) <= v(indices[1]) <= ... + * * @param vector the vector whose indices need to be sorted. * @param indices the vector for storing the sorted indices. * @param comparator the comparator to sort indices. @@ -100,11 +96,9 @@ private void quickSort() { } } - /** - * Select the pivot as the median of 3 samples. - */ + /** Select the pivot as the median of 3 samples. */ static int choosePivot( - int low, int high, IntVector indices, VectorValueComparator comparator) { + int low, int high, IntVector indices, VectorValueComparator comparator) { // we need at least 3 items if (high - low + 1 < FixedWidthInPlaceVectorSorter.STOP_CHOOSING_PIVOT_THRESHOLD) { return indices.get(low); @@ -149,8 +143,9 @@ static int choosePivot( /** * Partition a range of values in a vector into two parts, with elements in one part smaller than - * elements from the other part. The partition is based on the element indices, so it does - * not modify the underlying vector. + * elements from the other part. The partition is based on the element indices, so it does not + * modify the underlying vector. + * * @param low the lower bound of the range. * @param high the upper bound of the range. * @param indices vector element indices. @@ -159,7 +154,7 @@ static int choosePivot( * @return the index of the split point. */ public static int partition( - int low, int high, IntVector indices, VectorValueComparator comparator) { + int low, int high, IntVector indices, VectorValueComparator comparator) { int pivotIndex = choosePivot(low, high, indices, comparator); while (low < high) { diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/InsertionSorter.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/InsertionSorter.java index dc12a5fefdb65..c058636d66d1e 100644 --- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/InsertionSorter.java +++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/InsertionSorter.java @@ -14,27 +14,24 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.sort; import org.apache.arrow.vector.BaseFixedWidthVector; import org.apache.arrow.vector.IntVector; import org.apache.arrow.vector.ValueVector; -/** - * Insertion sorter. - */ +/** Insertion sorter. */ class InsertionSorter { /** * Sorts the range of a vector by insertion sort. * - * @param vector the vector to be sorted. - * @param startIdx the start index of the range (inclusive). - * @param endIdx the end index of the range (inclusive). - * @param buffer an extra buffer with capacity 1 to hold the current key. + * @param vector the vector to be sorted. + * @param startIdx the start index of the range (inclusive). + * @param endIdx the end index of the range (inclusive). + * @param buffer an extra buffer with capacity 1 to hold the current key. * @param comparator the criteria for vector element comparison. - * @param the vector type. + * @param the vector type. */ static void insertionSort( V vector, int startIdx, int endIdx, VectorValueComparator comparator, V buffer) { @@ -53,11 +50,11 @@ static void insertionSort( /** * Sorts the range of vector indices by insertion sort. * - * @param indices the vector indices. - * @param startIdx the start index of the range (inclusive). - * @param endIdx the end index of the range (inclusive). + * @param indices the vector indices. + * @param startIdx the start index of the range (inclusive). + * @param endIdx the end index of the range (inclusive). * @param comparator the criteria for vector element comparison. - * @param the vector type. + * @param the vector type. */ static void insertionSort( IntVector indices, int startIdx, int endIdx, VectorValueComparator comparator) { diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/OffHeapIntStack.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/OffHeapIntStack.java index df96121f1f8f7..ccb7bea4e2bd3 100644 --- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/OffHeapIntStack.java +++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/OffHeapIntStack.java @@ -14,15 +14,12 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.sort; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.vector.IntVector; -/** - * An off heap implementation of stack with int elements. - */ +/** An off heap implementation of stack with int elements. */ class OffHeapIntStack implements AutoCloseable { private static final int INIT_SIZE = 128; diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/OutOfPlaceVectorSorter.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/OutOfPlaceVectorSorter.java index 41d6dadc49147..b18e9b35d0895 100644 --- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/OutOfPlaceVectorSorter.java +++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/OutOfPlaceVectorSorter.java @@ -14,21 +14,21 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.sort; import org.apache.arrow.vector.ValueVector; /** - * Basic interface for sorting a vector out-of-place. - * That is, the sorting is performed on a newly-created vector, - * and the original vector is not modified. + * Basic interface for sorting a vector out-of-place. That is, the sorting is performed on a + * newly-created vector, and the original vector is not modified. + * * @param the vector type. */ public interface OutOfPlaceVectorSorter { /** * Sort a vector out-of-place. + * * @param inVec the input vector. * @param outVec the output vector, which has the same size as the input vector. * @param comparator the criteria for sort. diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/StableVectorComparator.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/StableVectorComparator.java index 0b0c3bd55b271..3fcfa5f8f215c 100644 --- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/StableVectorComparator.java +++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/StableVectorComparator.java @@ -14,17 +14,16 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.sort; import org.apache.arrow.util.Preconditions; import org.apache.arrow.vector.ValueVector; /** - * Stable sorter. It compares values like ordinary comparators. - * However, when values are equal, it breaks ties by the value indices. - * Therefore, sort algorithms using this comparator always produce + * Stable sorter. It compares values like ordinary comparators. However, when values are equal, it + * breaks ties by the value indices. Therefore, sort algorithms using this comparator always produce * stable sort results. + * * @param type of the vector. */ public class StableVectorComparator extends VectorValueComparator { @@ -33,6 +32,7 @@ public class StableVectorComparator extends VectorValueCo /** * Constructs a stable comparator from a given comparator. + * * @param innerComparator the comparator to convert to stable comparator.. */ public StableVectorComparator(VectorValueComparator innerComparator) { @@ -47,8 +47,9 @@ public void attachVector(V vector) { @Override public void attachVectors(V vector1, V vector2) { - Preconditions.checkArgument(vector1 == vector2, - "Stable comparator only supports comparing values from the same vector"); + Preconditions.checkArgument( + vector1 == vector2, + "Stable comparator only supports comparing values from the same vector"); super.attachVectors(vector1, vector2); innerComparator.attachVectors(vector1, vector2); } diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/VariableWidthOutOfPlaceVectorSorter.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/VariableWidthOutOfPlaceVectorSorter.java index 863b07c348ef2..8f58dc0dcee0f 100644 --- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/VariableWidthOutOfPlaceVectorSorter.java +++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/VariableWidthOutOfPlaceVectorSorter.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.sort; import org.apache.arrow.memory.ArrowBuf; @@ -25,12 +24,13 @@ import org.apache.arrow.vector.IntVector; /** - * Default sorter for variable-width vectors. - * It is an out-of-place sort, with time complexity O(n*log(n)). + * Default sorter for variable-width vectors. It is an out-of-place sort, with time complexity + * O(n*log(n)). + * * @param vector type. */ public class VariableWidthOutOfPlaceVectorSorter - implements OutOfPlaceVectorSorter { + implements OutOfPlaceVectorSorter { protected IndexSorter indexSorter = new IndexSorter<>(); @@ -46,20 +46,29 @@ public void sortOutOfPlace(V srcVector, V dstVector, VectorValueComparator co ArrowBuf dstOffsetBuffer = dstVector.getOffsetBuffer(); // check buffer size - Preconditions.checkArgument(dstValidityBuffer.capacity() * 8 >= srcVector.getValueCount(), - "Not enough capacity for the validity buffer of the dst vector. " + - "Expected capacity %s, actual capacity %s", - (srcVector.getValueCount() + 7) / 8, dstValidityBuffer.capacity()); Preconditions.checkArgument( - dstOffsetBuffer.capacity() >= (srcVector.getValueCount() + 1) * ((long) BaseVariableWidthVector.OFFSET_WIDTH), - "Not enough capacity for the offset buffer of the dst vector. " + - "Expected capacity %s, actual capacity %s", - (srcVector.getValueCount() + 1) * BaseVariableWidthVector.OFFSET_WIDTH, dstOffsetBuffer.capacity()); - long dataSize = srcVector.getOffsetBuffer().getInt( - srcVector.getValueCount() * ((long) BaseVariableWidthVector.OFFSET_WIDTH)); + dstValidityBuffer.capacity() * 8 >= srcVector.getValueCount(), + "Not enough capacity for the validity buffer of the dst vector. " + + "Expected capacity %s, actual capacity %s", + (srcVector.getValueCount() + 7) / 8, + dstValidityBuffer.capacity()); + Preconditions.checkArgument( + dstOffsetBuffer.capacity() + >= (srcVector.getValueCount() + 1) * ((long) BaseVariableWidthVector.OFFSET_WIDTH), + "Not enough capacity for the offset buffer of the dst vector. " + + "Expected capacity %s, actual capacity %s", + (srcVector.getValueCount() + 1) * BaseVariableWidthVector.OFFSET_WIDTH, + dstOffsetBuffer.capacity()); + long dataSize = + srcVector + .getOffsetBuffer() + .getInt(srcVector.getValueCount() * ((long) BaseVariableWidthVector.OFFSET_WIDTH)); Preconditions.checkArgument( - dstValueBuffer.capacity() >= dataSize, "No enough capacity for the data buffer of the dst vector. " + - "Expected capacity %s, actual capacity %s", dataSize, dstValueBuffer.capacity()); + dstValueBuffer.capacity() >= dataSize, + "No enough capacity for the data buffer of the dst vector. " + + "Expected capacity %s, actual capacity %s", + dataSize, + dstValueBuffer.capacity()); // sort value indices try (IntVector sortedIndices = new IntVector("", srcVector.getAllocator())) { @@ -77,16 +86,19 @@ public void sortOutOfPlace(V srcVector, V dstVector, VectorValueComparator co BitVectorHelper.unsetBit(dstValidityBuffer, dstIndex); } else { BitVectorHelper.setBit(dstValidityBuffer, dstIndex); - int srcOffset = srcOffsetBuffer.getInt(srcIndex * ((long) BaseVariableWidthVector.OFFSET_WIDTH)); + int srcOffset = + srcOffsetBuffer.getInt(srcIndex * ((long) BaseVariableWidthVector.OFFSET_WIDTH)); int valueLength = - srcOffsetBuffer.getInt((srcIndex + 1) * ((long) BaseVariableWidthVector.OFFSET_WIDTH)) - srcOffset; + srcOffsetBuffer.getInt((srcIndex + 1) * ((long) BaseVariableWidthVector.OFFSET_WIDTH)) + - srcOffset; MemoryUtil.UNSAFE.copyMemory( - srcValueBuffer.memoryAddress() + srcOffset, - dstValueBuffer.memoryAddress() + dstOffset, - valueLength); + srcValueBuffer.memoryAddress() + srcOffset, + dstValueBuffer.memoryAddress() + dstOffset, + valueLength); dstOffset += valueLength; } - dstOffsetBuffer.setInt((dstIndex + 1) * ((long) BaseVariableWidthVector.OFFSET_WIDTH), dstOffset); + dstOffsetBuffer.setInt( + (dstIndex + 1) * ((long) BaseVariableWidthVector.OFFSET_WIDTH), dstOffset); } } } diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/VectorValueComparator.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/VectorValueComparator.java index d2c772ca8a819..0472f04109b1c 100644 --- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/VectorValueComparator.java +++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/VectorValueComparator.java @@ -14,54 +14,44 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.sort; import org.apache.arrow.vector.ValueVector; /** - * Compare two values at the given indices in the vectors. - * This is used for vector sorting. + * Compare two values at the given indices in the vectors. This is used for vector sorting. + * * @param type of the vector. */ public abstract class VectorValueComparator { - /** - * The first vector to compare. - */ + /** The first vector to compare. */ protected V vector1; - /** - * The second vector to compare. - */ + /** The second vector to compare. */ protected V vector2; - /** - * Width of the vector value. For variable-length vectors, this value makes no sense. - */ + /** Width of the vector value. For variable-length vectors, this value makes no sense. */ protected int valueWidth; - private boolean checkNullsOnCompare = true; /** - * This value is true by default and re-computed when vectors are attached to the comparator. If both vectors cannot - * contain nulls then this value is {@code false} and calls to {@code compare(i1, i2)} are short-circuited - * to {@code compareNotNull(i1, i2)} thereby speeding up comparisons resulting in faster sorts etc. + * This value is true by default and re-computed when vectors are attached to the comparator. If + * both vectors cannot contain nulls then this value is {@code false} and calls to {@code + * compare(i1, i2)} are short-circuited to {@code compareNotNull(i1, i2)} thereby speeding up + * comparisons resulting in faster sorts etc. */ public boolean checkNullsOnCompare() { return this.checkNullsOnCompare; } - /** - * Constructor for variable-width vectors. - */ - protected VectorValueComparator() { - - } + /** Constructor for variable-width vectors. */ + protected VectorValueComparator() {} /** * Constructor for fixed-width vectors. + * * @param valueWidth the record width (in bytes). */ protected VectorValueComparator(int valueWidth) { @@ -74,6 +64,7 @@ public int getValueWidth() { /** * Attach both vectors to compare to the same input vector. + * * @param vector the vector to attach. */ public void attachVector(V vector) { @@ -82,6 +73,7 @@ public void attachVector(V vector) { /** * Attach vectors to compare. + * * @param vector1 the first vector to compare. * @param vector2 the second vector to compare. */ @@ -99,7 +91,7 @@ private boolean mayHaveNulls(V v) { if (v.getValueCount() == 0) { return true; } - if (! v.getField().isNullable()) { + if (!v.getField().isNullable()) { return false; } return v.getNullCount() > 0; @@ -107,11 +99,11 @@ private boolean mayHaveNulls(V v) { /** * Compare two values, given their indices. + * * @param index1 index of the first value to compare. * @param index2 index of the second value to compare. - * @return an integer greater than 0, if the first value is greater; - * an integer smaller than 0, if the first value is smaller; or 0, if both - * values are equal. + * @return an integer greater than 0, if the first value is greater; an integer smaller than 0, if + * the first value is smaller; or 0, if both values are equal. */ public int compare(int index1, int index2) { if (checkNullsOnCompare) { @@ -133,19 +125,19 @@ public int compare(int index1, int index2) { } /** - * Compare two values, given their indices. - * This is a fast path for comparing non-null values, so the caller - * must make sure that values at both indices are not null. + * Compare two values, given their indices. This is a fast path for comparing non-null values, so + * the caller must make sure that values at both indices are not null. + * * @param index1 index of the first value to compare. * @param index2 index of the second value to compare. - * @return an integer greater than 0, if the first value is greater; - * an integer smaller than 0, if the first value is smaller; or 0, if both - * values are equal. + * @return an integer greater than 0, if the first value is greater; an integer smaller than 0, if + * the first value is smaller; or 0, if both values are equal. */ public abstract int compareNotNull(int index1, int index2); /** * Creates a comparator of the same type. + * * @return the newly created comparator. */ public abstract VectorValueComparator createNew(); diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/deduplicate/TestDeduplicationUtils.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/deduplicate/TestDeduplicationUtils.java index ac083b84f1611..537189013a731 100644 --- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/deduplicate/TestDeduplicationUtils.java +++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/deduplicate/TestDeduplicationUtils.java @@ -14,14 +14,12 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.deduplicate; import static org.junit.Assert.assertEquals; import static org.junit.jupiter.api.Assertions.assertArrayEquals; import java.nio.charset.StandardCharsets; - import org.apache.arrow.memory.ArrowBuf; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; @@ -33,9 +31,7 @@ import org.junit.Before; import org.junit.Test; -/** - * Test cases for {@link DeduplicationUtils}. - */ +/** Test cases for {@link DeduplicationUtils}. */ public class TestDeduplicationUtils { private static final int VECTOR_LENGTH = 100; @@ -57,10 +53,11 @@ public void shutdown() { @Test public void testDeduplicateFixedWidth() { try (IntVector origVec = new IntVector("original vec", allocator); - IntVector dedupVec = new IntVector("deduplicated vec", allocator); - IntVector lengthVec = new IntVector("length vec", allocator); - ArrowBuf distinctBuf = allocator.buffer( - DataSizeRoundingUtil.divideBy8Ceil(VECTOR_LENGTH * REPETITION_COUNT))) { + IntVector dedupVec = new IntVector("deduplicated vec", allocator); + IntVector lengthVec = new IntVector("length vec", allocator); + ArrowBuf distinctBuf = + allocator.buffer( + DataSizeRoundingUtil.divideBy8Ceil(VECTOR_LENGTH * REPETITION_COUNT))) { origVec.allocateNew(VECTOR_LENGTH * REPETITION_COUNT); origVec.setValueCount(VECTOR_LENGTH * REPETITION_COUNT); lengthVec.allocateNew(); @@ -73,9 +70,10 @@ public void testDeduplicateFixedWidth() { } DeduplicationUtils.populateRunStartIndicators(origVec, distinctBuf); - assertEquals( VECTOR_LENGTH, - VECTOR_LENGTH * REPETITION_COUNT - - BitVectorHelper.getNullCount(distinctBuf, VECTOR_LENGTH * REPETITION_COUNT)); + assertEquals( + VECTOR_LENGTH, + VECTOR_LENGTH * REPETITION_COUNT + - BitVectorHelper.getNullCount(distinctBuf, VECTOR_LENGTH * REPETITION_COUNT)); DeduplicationUtils.populateDeduplicatedValues(distinctBuf, origVec, dedupVec); assertEquals(VECTOR_LENGTH, dedupVec.getValueCount()); @@ -84,7 +82,8 @@ public void testDeduplicateFixedWidth() { assertEquals(i, dedupVec.get(i)); } - DeduplicationUtils.populateRunLengths(distinctBuf, lengthVec, VECTOR_LENGTH * REPETITION_COUNT); + DeduplicationUtils.populateRunLengths( + distinctBuf, lengthVec, VECTOR_LENGTH * REPETITION_COUNT); assertEquals(VECTOR_LENGTH, lengthVec.getValueCount()); for (int i = 0; i < VECTOR_LENGTH; i++) { @@ -96,12 +95,12 @@ public void testDeduplicateFixedWidth() { @Test public void testDeduplicateVariableWidth() { try (VarCharVector origVec = new VarCharVector("original vec", allocator); - VarCharVector dedupVec = new VarCharVector("deduplicated vec", allocator); - IntVector lengthVec = new IntVector("length vec", allocator); - ArrowBuf distinctBuf = allocator.buffer( - DataSizeRoundingUtil.divideBy8Ceil(VECTOR_LENGTH * REPETITION_COUNT))) { - origVec.allocateNew( - VECTOR_LENGTH * REPETITION_COUNT * 10, VECTOR_LENGTH * REPETITION_COUNT); + VarCharVector dedupVec = new VarCharVector("deduplicated vec", allocator); + IntVector lengthVec = new IntVector("length vec", allocator); + ArrowBuf distinctBuf = + allocator.buffer( + DataSizeRoundingUtil.divideBy8Ceil(VECTOR_LENGTH * REPETITION_COUNT))) { + origVec.allocateNew(VECTOR_LENGTH * REPETITION_COUNT * 10, VECTOR_LENGTH * REPETITION_COUNT); origVec.setValueCount(VECTOR_LENGTH * REPETITION_COUNT); lengthVec.allocateNew(); @@ -114,9 +113,10 @@ public void testDeduplicateVariableWidth() { } DeduplicationUtils.populateRunStartIndicators(origVec, distinctBuf); - assertEquals(VECTOR_LENGTH, - VECTOR_LENGTH * REPETITION_COUNT - - BitVectorHelper.getNullCount(distinctBuf, VECTOR_LENGTH * REPETITION_COUNT)); + assertEquals( + VECTOR_LENGTH, + VECTOR_LENGTH * REPETITION_COUNT + - BitVectorHelper.getNullCount(distinctBuf, VECTOR_LENGTH * REPETITION_COUNT)); DeduplicationUtils.populateDeduplicatedValues(distinctBuf, origVec, dedupVec); assertEquals(VECTOR_LENGTH, dedupVec.getValueCount()); @@ -126,7 +126,7 @@ public void testDeduplicateVariableWidth() { } DeduplicationUtils.populateRunLengths( - distinctBuf, lengthVec, VECTOR_LENGTH * REPETITION_COUNT); + distinctBuf, lengthVec, VECTOR_LENGTH * REPETITION_COUNT); assertEquals(VECTOR_LENGTH, lengthVec.getValueCount()); for (int i = 0; i < VECTOR_LENGTH; i++) { diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/deduplicate/TestVectorRunDeduplicator.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/deduplicate/TestVectorRunDeduplicator.java index 788213b162870..820cadccae537 100644 --- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/deduplicate/TestVectorRunDeduplicator.java +++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/deduplicate/TestVectorRunDeduplicator.java @@ -14,14 +14,12 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.deduplicate; import static org.junit.Assert.assertEquals; import static org.junit.jupiter.api.Assertions.assertArrayEquals; import java.nio.charset.StandardCharsets; - import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; import org.apache.arrow.vector.IntVector; @@ -30,9 +28,7 @@ import org.junit.Before; import org.junit.Test; -/** - * Test cases for {@link VectorRunDeduplicator}. - */ +/** Test cases for {@link VectorRunDeduplicator}. */ public class TestVectorRunDeduplicator { private static final int VECTOR_LENGTH = 100; @@ -57,7 +53,7 @@ public void testDeduplicateFixedWidth() { IntVector dedupVec = new IntVector("deduplicated vec", allocator); IntVector lengthVec = new IntVector("length vec", allocator); VectorRunDeduplicator deduplicator = - new VectorRunDeduplicator<>(origVec, allocator)) { + new VectorRunDeduplicator<>(origVec, allocator)) { origVec.allocateNew(VECTOR_LENGTH * REPETITION_COUNT); origVec.setValueCount(VECTOR_LENGTH * REPETITION_COUNT); lengthVec.allocateNew(); @@ -93,12 +89,11 @@ public void testDeduplicateFixedWidth() { @Test public void testDeduplicateVariableWidth() { try (VarCharVector origVec = new VarCharVector("original vec", allocator); - VarCharVector dedupVec = new VarCharVector("deduplicated vec", allocator); - IntVector lengthVec = new IntVector("length vec", allocator); - VectorRunDeduplicator deduplicator = - new VectorRunDeduplicator<>(origVec, allocator)) { - origVec.allocateNew( - VECTOR_LENGTH * REPETITION_COUNT * 10, VECTOR_LENGTH * REPETITION_COUNT); + VarCharVector dedupVec = new VarCharVector("deduplicated vec", allocator); + IntVector lengthVec = new IntVector("length vec", allocator); + VectorRunDeduplicator deduplicator = + new VectorRunDeduplicator<>(origVec, allocator)) { + origVec.allocateNew(VECTOR_LENGTH * REPETITION_COUNT * 10, VECTOR_LENGTH * REPETITION_COUNT); origVec.setValueCount(VECTOR_LENGTH * REPETITION_COUNT); lengthVec.allocateNew(); diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestHashTableBasedDictionaryBuilder.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestHashTableBasedDictionaryBuilder.java index 45c47626b720e..bfda86f26883d 100644 --- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestHashTableBasedDictionaryBuilder.java +++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestHashTableBasedDictionaryBuilder.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.dictionary; import static junit.framework.TestCase.assertTrue; @@ -23,7 +22,6 @@ import java.nio.charset.StandardCharsets; import java.util.Objects; - import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; import org.apache.arrow.vector.IntVector; @@ -32,9 +30,7 @@ import org.junit.Before; import org.junit.Test; -/** - * Test cases for {@link HashTableBasedDictionaryBuilder}. - */ +/** Test cases for {@link HashTableBasedDictionaryBuilder}. */ public class TestHashTableBasedDictionaryBuilder { private BufferAllocator allocator; @@ -52,7 +48,7 @@ public void shutdown() { @Test public void testBuildVariableWidthDictionaryWithNull() { try (VarCharVector vec = new VarCharVector("", allocator); - VarCharVector dictionary = new VarCharVector("", allocator)) { + VarCharVector dictionary = new VarCharVector("", allocator)) { vec.allocateNew(100, 10); vec.setValueCount(10); @@ -72,27 +68,34 @@ public void testBuildVariableWidthDictionaryWithNull() { vec.set(9, "abc".getBytes(StandardCharsets.UTF_8)); HashTableBasedDictionaryBuilder dictionaryBuilder = - new HashTableBasedDictionaryBuilder<>(dictionary, true); + new HashTableBasedDictionaryBuilder<>(dictionary, true); int result = dictionaryBuilder.addValues(vec); assertEquals(7, result); assertEquals(7, dictionary.getValueCount()); - assertEquals("hello", new String(Objects.requireNonNull(dictionary.get(0)), StandardCharsets.UTF_8)); - assertEquals("abc", new String(Objects.requireNonNull(dictionary.get(1)), StandardCharsets.UTF_8)); + assertEquals( + "hello", new String(Objects.requireNonNull(dictionary.get(0)), StandardCharsets.UTF_8)); + assertEquals( + "abc", new String(Objects.requireNonNull(dictionary.get(1)), StandardCharsets.UTF_8)); assertNull(dictionary.get(2)); - assertEquals("world", new String(Objects.requireNonNull(dictionary.get(3)), StandardCharsets.UTF_8)); - assertEquals("12", new String(Objects.requireNonNull(dictionary.get(4)), StandardCharsets.UTF_8)); - assertEquals("dictionary", new String(Objects.requireNonNull(dictionary.get(5)), StandardCharsets.UTF_8)); - assertEquals("good", new String(Objects.requireNonNull(dictionary.get(6)), StandardCharsets.UTF_8)); + assertEquals( + "world", new String(Objects.requireNonNull(dictionary.get(3)), StandardCharsets.UTF_8)); + assertEquals( + "12", new String(Objects.requireNonNull(dictionary.get(4)), StandardCharsets.UTF_8)); + assertEquals( + "dictionary", + new String(Objects.requireNonNull(dictionary.get(5)), StandardCharsets.UTF_8)); + assertEquals( + "good", new String(Objects.requireNonNull(dictionary.get(6)), StandardCharsets.UTF_8)); } } @Test public void testBuildVariableWidthDictionaryWithoutNull() { try (VarCharVector vec = new VarCharVector("", allocator); - VarCharVector dictionary = new VarCharVector("", allocator)) { + VarCharVector dictionary = new VarCharVector("", allocator)) { vec.allocateNew(100, 10); vec.setValueCount(10); @@ -112,27 +115,33 @@ public void testBuildVariableWidthDictionaryWithoutNull() { vec.set(9, "abc".getBytes(StandardCharsets.UTF_8)); HashTableBasedDictionaryBuilder dictionaryBuilder = - new HashTableBasedDictionaryBuilder<>(dictionary, false); + new HashTableBasedDictionaryBuilder<>(dictionary, false); int result = dictionaryBuilder.addValues(vec); assertEquals(6, result); assertEquals(6, dictionary.getValueCount()); - assertEquals("hello", new String(Objects.requireNonNull(dictionary.get(0)), StandardCharsets.UTF_8)); - assertEquals("abc", new String(Objects.requireNonNull(dictionary.get(1)), StandardCharsets.UTF_8)); - assertEquals("world", new String(Objects.requireNonNull(dictionary.get(2)), StandardCharsets.UTF_8)); - assertEquals("12", new String(Objects.requireNonNull(dictionary.get(3)), StandardCharsets.UTF_8)); - assertEquals("dictionary", new String(Objects.requireNonNull(dictionary.get(4)), StandardCharsets.UTF_8)); - assertEquals("good", new String(Objects.requireNonNull(dictionary.get(5)), StandardCharsets.UTF_8)); - + assertEquals( + "hello", new String(Objects.requireNonNull(dictionary.get(0)), StandardCharsets.UTF_8)); + assertEquals( + "abc", new String(Objects.requireNonNull(dictionary.get(1)), StandardCharsets.UTF_8)); + assertEquals( + "world", new String(Objects.requireNonNull(dictionary.get(2)), StandardCharsets.UTF_8)); + assertEquals( + "12", new String(Objects.requireNonNull(dictionary.get(3)), StandardCharsets.UTF_8)); + assertEquals( + "dictionary", + new String(Objects.requireNonNull(dictionary.get(4)), StandardCharsets.UTF_8)); + assertEquals( + "good", new String(Objects.requireNonNull(dictionary.get(5)), StandardCharsets.UTF_8)); } } @Test public void testBuildFixedWidthDictionaryWithNull() { try (IntVector vec = new IntVector("", allocator); - IntVector dictionary = new IntVector("", allocator)) { + IntVector dictionary = new IntVector("", allocator)) { vec.allocateNew(10); vec.setValueCount(10); @@ -151,7 +160,7 @@ public void testBuildFixedWidthDictionaryWithNull() { vec.setNull(9); HashTableBasedDictionaryBuilder dictionaryBuilder = - new HashTableBasedDictionaryBuilder<>(dictionary, true); + new HashTableBasedDictionaryBuilder<>(dictionary, true); int result = dictionaryBuilder.addValues(vec); @@ -169,7 +178,7 @@ public void testBuildFixedWidthDictionaryWithNull() { @Test public void testBuildFixedWidthDictionaryWithoutNull() { try (IntVector vec = new IntVector("", allocator); - IntVector dictionary = new IntVector("", allocator)) { + IntVector dictionary = new IntVector("", allocator)) { vec.allocateNew(10); vec.setValueCount(10); @@ -188,7 +197,7 @@ public void testBuildFixedWidthDictionaryWithoutNull() { vec.setNull(9); HashTableBasedDictionaryBuilder dictionaryBuilder = - new HashTableBasedDictionaryBuilder<>(dictionary, false); + new HashTableBasedDictionaryBuilder<>(dictionary, false); int result = dictionaryBuilder.addValues(vec); @@ -199,7 +208,6 @@ public void testBuildFixedWidthDictionaryWithoutNull() { assertEquals(8, dictionary.get(1)); assertEquals(32, dictionary.get(2)); assertEquals(16, dictionary.get(3)); - } } } diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestHashTableDictionaryEncoder.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestHashTableDictionaryEncoder.java index 60efbf58bebda..b9646284a015b 100644 --- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestHashTableDictionaryEncoder.java +++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestHashTableDictionaryEncoder.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.dictionary; import static junit.framework.TestCase.assertTrue; @@ -25,7 +24,6 @@ import java.nio.charset.StandardCharsets; import java.util.Arrays; import java.util.Random; - import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; import org.apache.arrow.vector.IntVector; @@ -38,9 +36,7 @@ import org.junit.Before; import org.junit.Test; -/** - * Test cases for {@link HashTableDictionaryEncoder}. - */ +/** Test cases for {@link HashTableDictionaryEncoder}. */ public class TestHashTableDictionaryEncoder { private final int VECTOR_LENGTH = 50; @@ -53,7 +49,7 @@ public class TestHashTableDictionaryEncoder { byte[] one = "111".getBytes(StandardCharsets.UTF_8); byte[] two = "222".getBytes(StandardCharsets.UTF_8); - byte[][] data = new byte[][]{zero, one, two}; + byte[][] data = new byte[][] {zero, one, two}; @Before public void prepare() { @@ -69,8 +65,8 @@ public void shutdown() { public void testEncodeAndDecode() { Random random = new Random(); try (VarCharVector rawVector = new VarCharVector("original vector", allocator); - IntVector encodedVector = new IntVector("encoded vector", allocator); - VarCharVector dictionary = new VarCharVector("dictionary", allocator)) { + IntVector encodedVector = new IntVector("encoded vector", allocator); + VarCharVector dictionary = new VarCharVector("dictionary", allocator)) { // set up dictionary dictionary.allocateNew(); @@ -89,7 +85,7 @@ public void testEncodeAndDecode() { rawVector.setValueCount(VECTOR_LENGTH); HashTableDictionaryEncoder encoder = - new HashTableDictionaryEncoder<>(dictionary, false); + new HashTableDictionaryEncoder<>(dictionary, false); // perform encoding encodedVector.allocateNew(); @@ -98,17 +94,21 @@ public void testEncodeAndDecode() { // verify encoding results assertEquals(rawVector.getValueCount(), encodedVector.getValueCount()); for (int i = 0; i < VECTOR_LENGTH; i++) { - assertArrayEquals(rawVector.get(i), String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8)); + assertArrayEquals( + rawVector.get(i), + String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8)); } // perform decoding Dictionary dict = new Dictionary(dictionary, new DictionaryEncoding(1L, false, null)); - try (VarCharVector decodedVector = (VarCharVector) DictionaryEncoder.decode(encodedVector, dict)) { + try (VarCharVector decodedVector = + (VarCharVector) DictionaryEncoder.decode(encodedVector, dict)) { // verify decoding results assertEquals(encodedVector.getValueCount(), decodedVector.getValueCount()); for (int i = 0; i < VECTOR_LENGTH; i++) { - assertArrayEquals(String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8), + assertArrayEquals( + String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8), decodedVector.get(i)); } } @@ -119,8 +119,8 @@ public void testEncodeAndDecode() { public void testEncodeAndDecodeWithNull() { Random random = new Random(); try (VarCharVector rawVector = new VarCharVector("original vector", allocator); - IntVector encodedVector = new IntVector("encoded vector", allocator); - VarCharVector dictionary = new VarCharVector("dictionary", allocator)) { + IntVector encodedVector = new IntVector("encoded vector", allocator); + VarCharVector dictionary = new VarCharVector("dictionary", allocator)) { // set up dictionary dictionary.allocateNew(); @@ -144,7 +144,7 @@ public void testEncodeAndDecodeWithNull() { rawVector.setValueCount(VECTOR_LENGTH); HashTableDictionaryEncoder encoder = - new HashTableDictionaryEncoder<>(dictionary, true); + new HashTableDictionaryEncoder<>(dictionary, true); // perform encoding encodedVector.allocateNew(); @@ -156,20 +156,24 @@ public void testEncodeAndDecodeWithNull() { if (i % 10 == 0) { assertEquals(0, encodedVector.get(i)); } else { - assertArrayEquals(rawVector.get(i), String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8)); + assertArrayEquals( + rawVector.get(i), + String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8)); } } // perform decoding Dictionary dict = new Dictionary(dictionary, new DictionaryEncoding(1L, false, null)); - try (VarCharVector decodedVector = (VarCharVector) DictionaryEncoder.decode(encodedVector, dict)) { + try (VarCharVector decodedVector = + (VarCharVector) DictionaryEncoder.decode(encodedVector, dict)) { // verify decoding results assertEquals(encodedVector.getValueCount(), decodedVector.getValueCount()); for (int i = 0; i < VECTOR_LENGTH; i++) { if (i % 10 == 0) { assertTrue(decodedVector.isNull(i)); } else { - assertArrayEquals(String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8), + assertArrayEquals( + String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8), decodedVector.get(i)); } } @@ -180,8 +184,8 @@ public void testEncodeAndDecodeWithNull() { @Test public void testEncodeNullWithoutNullInDictionary() { try (VarCharVector rawVector = new VarCharVector("original vector", allocator); - IntVector encodedVector = new IntVector("encoded vector", allocator); - VarCharVector dictionary = new VarCharVector("dictionary", allocator)) { + IntVector encodedVector = new IntVector("encoded vector", allocator); + VarCharVector dictionary = new VarCharVector("dictionary", allocator)) { // set up dictionary, with no null in it. dictionary.allocateNew(); @@ -199,13 +203,15 @@ public void testEncodeNullWithoutNullInDictionary() { encodedVector.allocateNew(); HashTableDictionaryEncoder encoder = - new HashTableDictionaryEncoder<>(dictionary, true); + new HashTableDictionaryEncoder<>(dictionary, true); // the encoder should encode null, but no null in the dictionary, // so an exception should be thrown. - assertThrows(IllegalArgumentException.class, () -> { - encoder.encode(rawVector, encodedVector); - }); + assertThrows( + IllegalArgumentException.class, + () -> { + encoder.encode(rawVector, encodedVector); + }); } } @@ -213,8 +219,8 @@ public void testEncodeNullWithoutNullInDictionary() { public void testEncodeStrings() { // Create a new value vector try (final VarCharVector vector = new VarCharVector("foo", allocator); - final IntVector encoded = new IntVector("encoded", allocator); - final VarCharVector dictionaryVector = new VarCharVector("dict", allocator)) { + final IntVector encoded = new IntVector("encoded", allocator); + final VarCharVector dictionaryVector = new VarCharVector("dict", allocator)) { vector.allocateNew(512, 5); encoded.allocateNew(); @@ -235,7 +241,7 @@ public void testEncodeStrings() { dictionaryVector.setValueCount(3); HashTableDictionaryEncoder encoder = - new HashTableDictionaryEncoder<>(dictionaryVector); + new HashTableDictionaryEncoder<>(dictionaryVector); encoder.encode(vector, encoded); // verify indices @@ -262,8 +268,8 @@ public void testEncodeStrings() { public void testEncodeLargeVector() { // Create a new value vector try (final VarCharVector vector = new VarCharVector("foo", allocator); - final IntVector encoded = new IntVector("encoded", allocator); - final VarCharVector dictionaryVector = new VarCharVector("dict", allocator)) { + final IntVector encoded = new IntVector("encoded", allocator); + final VarCharVector dictionaryVector = new VarCharVector("dict", allocator)) { vector.allocateNew(); encoded.allocateNew(); @@ -281,7 +287,7 @@ public void testEncodeLargeVector() { dictionaryVector.setValueCount(3); HashTableDictionaryEncoder encoder = - new HashTableDictionaryEncoder<>(dictionaryVector); + new HashTableDictionaryEncoder<>(dictionaryVector); encoder.encode(vector, encoded); assertEquals(count, encoded.getValueCount()); @@ -305,8 +311,8 @@ public void testEncodeLargeVector() { public void testEncodeBinaryVector() { // Create a new value vector try (final VarBinaryVector vector = new VarBinaryVector("foo", allocator); - final VarBinaryVector dictionaryVector = new VarBinaryVector("dict", allocator); - final IntVector encoded = new IntVector("encoded", allocator)) { + final VarBinaryVector dictionaryVector = new VarBinaryVector("dict", allocator); + final IntVector encoded = new IntVector("encoded", allocator)) { vector.allocateNew(512, 5); vector.allocateNew(); encoded.allocateNew(); @@ -327,7 +333,7 @@ public void testEncodeBinaryVector() { dictionaryVector.setValueCount(3); HashTableDictionaryEncoder encoder = - new HashTableDictionaryEncoder<>(dictionaryVector); + new HashTableDictionaryEncoder<>(dictionaryVector); encoder.encode(vector, encoded); assertEquals(5, encoded.getValueCount()); diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestLinearDictionaryEncoder.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestLinearDictionaryEncoder.java index a76aedffa308d..a4641704198cb 100644 --- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestLinearDictionaryEncoder.java +++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestLinearDictionaryEncoder.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.dictionary; import static junit.framework.TestCase.assertTrue; @@ -25,7 +24,6 @@ import java.nio.charset.StandardCharsets; import java.util.Arrays; import java.util.Random; - import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; import org.apache.arrow.vector.IntVector; @@ -39,9 +37,7 @@ import org.junit.Before; import org.junit.Test; -/** - * Test cases for {@link LinearDictionaryEncoder}. - */ +/** Test cases for {@link LinearDictionaryEncoder}. */ public class TestLinearDictionaryEncoder { private final int VECTOR_LENGTH = 50; @@ -54,7 +50,7 @@ public class TestLinearDictionaryEncoder { byte[] one = "111".getBytes(StandardCharsets.UTF_8); byte[] two = "222".getBytes(StandardCharsets.UTF_8); - byte[][] data = new byte[][]{zero, one, two}; + byte[][] data = new byte[][] {zero, one, two}; @Before public void prepare() { @@ -70,8 +66,8 @@ public void shutdown() { public void testEncodeAndDecode() { Random random = new Random(); try (VarCharVector rawVector = new VarCharVector("original vector", allocator); - IntVector encodedVector = new IntVector("encoded vector", allocator); - VarCharVector dictionary = new VarCharVector("dictionary", allocator)) { + IntVector encodedVector = new IntVector("encoded vector", allocator); + VarCharVector dictionary = new VarCharVector("dictionary", allocator)) { // set up dictionary dictionary.allocateNew(); @@ -90,7 +86,7 @@ public void testEncodeAndDecode() { rawVector.setValueCount(VECTOR_LENGTH); LinearDictionaryEncoder encoder = - new LinearDictionaryEncoder<>(dictionary, false); + new LinearDictionaryEncoder<>(dictionary, false); // perform encoding encodedVector.allocateNew(); @@ -99,17 +95,21 @@ public void testEncodeAndDecode() { // verify encoding results assertEquals(rawVector.getValueCount(), encodedVector.getValueCount()); for (int i = 0; i < VECTOR_LENGTH; i++) { - assertArrayEquals(rawVector.get(i), String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8)); + assertArrayEquals( + rawVector.get(i), + String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8)); } // perform decoding Dictionary dict = new Dictionary(dictionary, new DictionaryEncoding(1L, false, null)); - try (VarCharVector decodedVector = (VarCharVector) DictionaryEncoder.decode(encodedVector, dict)) { + try (VarCharVector decodedVector = + (VarCharVector) DictionaryEncoder.decode(encodedVector, dict)) { // verify decoding results assertEquals(encodedVector.getValueCount(), decodedVector.getValueCount()); for (int i = 0; i < VECTOR_LENGTH; i++) { - assertArrayEquals(String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8), + assertArrayEquals( + String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8), decodedVector.get(i)); } } @@ -120,8 +120,8 @@ public void testEncodeAndDecode() { public void testEncodeAndDecodeWithNull() { Random random = new Random(); try (VarCharVector rawVector = new VarCharVector("original vector", allocator); - IntVector encodedVector = new IntVector("encoded vector", allocator); - VarCharVector dictionary = new VarCharVector("dictionary", allocator)) { + IntVector encodedVector = new IntVector("encoded vector", allocator); + VarCharVector dictionary = new VarCharVector("dictionary", allocator)) { // set up dictionary dictionary.allocateNew(); @@ -145,7 +145,7 @@ public void testEncodeAndDecodeWithNull() { rawVector.setValueCount(VECTOR_LENGTH); LinearDictionaryEncoder encoder = - new LinearDictionaryEncoder<>(dictionary, true); + new LinearDictionaryEncoder<>(dictionary, true); // perform encoding encodedVector.allocateNew(); @@ -157,13 +157,16 @@ public void testEncodeAndDecodeWithNull() { if (i % 10 == 0) { assertEquals(0, encodedVector.get(i)); } else { - assertArrayEquals(rawVector.get(i), String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8)); + assertArrayEquals( + rawVector.get(i), + String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8)); } } // perform decoding Dictionary dict = new Dictionary(dictionary, new DictionaryEncoding(1L, false, null)); - try (VarCharVector decodedVector = (VarCharVector) DictionaryEncoder.decode(encodedVector, dict)) { + try (VarCharVector decodedVector = + (VarCharVector) DictionaryEncoder.decode(encodedVector, dict)) { // verify decoding results assertEquals(encodedVector.getValueCount(), decodedVector.getValueCount()); @@ -171,7 +174,8 @@ public void testEncodeAndDecodeWithNull() { if (i % 10 == 0) { assertTrue(decodedVector.isNull(i)); } else { - assertArrayEquals(String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8), + assertArrayEquals( + String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8), decodedVector.get(i)); } } @@ -182,8 +186,8 @@ public void testEncodeAndDecodeWithNull() { @Test public void testEncodeNullWithoutNullInDictionary() { try (VarCharVector rawVector = new VarCharVector("original vector", allocator); - IntVector encodedVector = new IntVector("encoded vector", allocator); - VarCharVector dictionary = new VarCharVector("dictionary", allocator)) { + IntVector encodedVector = new IntVector("encoded vector", allocator); + VarCharVector dictionary = new VarCharVector("dictionary", allocator)) { // set up dictionary, with no null in it. dictionary.allocateNew(); @@ -201,13 +205,15 @@ public void testEncodeNullWithoutNullInDictionary() { encodedVector.allocateNew(); LinearDictionaryEncoder encoder = - new LinearDictionaryEncoder<>(dictionary, true); + new LinearDictionaryEncoder<>(dictionary, true); // the encoder should encode null, but no null in the dictionary, // so an exception should be thrown. - assertThrows(IllegalArgumentException.class, () -> { - encoder.encode(rawVector, encodedVector); - }); + assertThrows( + IllegalArgumentException.class, + () -> { + encoder.encode(rawVector, encodedVector); + }); } } @@ -215,8 +221,8 @@ public void testEncodeNullWithoutNullInDictionary() { public void testEncodeStrings() { // Create a new value vector try (final VarCharVector vector = new VarCharVector("foo", allocator); - final IntVector encoded = new IntVector("encoded", allocator); - final VarCharVector dictionaryVector = new VarCharVector("dict", allocator)) { + final IntVector encoded = new IntVector("encoded", allocator); + final VarCharVector dictionaryVector = new VarCharVector("dict", allocator)) { vector.allocateNew(512, 5); encoded.allocateNew(); @@ -237,7 +243,7 @@ public void testEncodeStrings() { dictionaryVector.setValueCount(3); LinearDictionaryEncoder encoder = - new LinearDictionaryEncoder<>(dictionaryVector); + new LinearDictionaryEncoder<>(dictionaryVector); encoder.encode(vector, encoded); // verify indices @@ -263,8 +269,8 @@ public void testEncodeStrings() { public void testEncodeLargeVector() { // Create a new value vector try (final VarCharVector vector = new VarCharVector("foo", allocator); - final IntVector encoded = new IntVector("encoded", allocator); - final VarCharVector dictionaryVector = new VarCharVector("dict", allocator)) { + final IntVector encoded = new IntVector("encoded", allocator); + final VarCharVector dictionaryVector = new VarCharVector("dict", allocator)) { vector.allocateNew(); encoded.allocateNew(); @@ -282,7 +288,7 @@ public void testEncodeLargeVector() { dictionaryVector.setValueCount(3); LinearDictionaryEncoder encoder = - new LinearDictionaryEncoder<>(dictionaryVector); + new LinearDictionaryEncoder<>(dictionaryVector); encoder.encode(vector, encoded); assertEquals(count, encoded.getValueCount()); @@ -306,8 +312,8 @@ public void testEncodeLargeVector() { public void testEncodeBinaryVector() { // Create a new value vector try (final VarBinaryVector vector = new VarBinaryVector("foo", allocator); - final VarBinaryVector dictionaryVector = new VarBinaryVector("dict", allocator); - final IntVector encoded = new IntVector("encoded", allocator)) { + final VarBinaryVector dictionaryVector = new VarBinaryVector("dict", allocator); + final IntVector encoded = new IntVector("encoded", allocator)) { vector.allocateNew(512, 5); vector.allocateNew(); encoded.allocateNew(); @@ -328,7 +334,7 @@ public void testEncodeBinaryVector() { dictionaryVector.setValueCount(3); LinearDictionaryEncoder encoder = - new LinearDictionaryEncoder<>(dictionaryVector); + new LinearDictionaryEncoder<>(dictionaryVector); encoder.encode(vector, encoded); assertEquals(5, encoded.getValueCount()); diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestSearchDictionaryEncoder.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestSearchDictionaryEncoder.java index e01c2e7905b46..e783e1f76818c 100644 --- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestSearchDictionaryEncoder.java +++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestSearchDictionaryEncoder.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.dictionary; import static junit.framework.TestCase.assertTrue; @@ -25,7 +24,6 @@ import java.nio.charset.StandardCharsets; import java.util.Arrays; import java.util.Random; - import org.apache.arrow.algorithm.sort.DefaultVectorComparators; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; @@ -40,9 +38,7 @@ import org.junit.Before; import org.junit.Test; -/** - * Test cases for {@link SearchDictionaryEncoder}. - */ +/** Test cases for {@link SearchDictionaryEncoder}. */ public class TestSearchDictionaryEncoder { private final int VECTOR_LENGTH = 50; @@ -55,7 +51,7 @@ public class TestSearchDictionaryEncoder { byte[] one = "111".getBytes(StandardCharsets.UTF_8); byte[] two = "222".getBytes(StandardCharsets.UTF_8); - byte[][] data = new byte[][]{zero, one, two}; + byte[][] data = new byte[][] {zero, one, two}; @Before public void prepare() { @@ -71,8 +67,8 @@ public void shutdown() { public void testEncodeAndDecode() { Random random = new Random(); try (VarCharVector rawVector = new VarCharVector("original vector", allocator); - IntVector encodedVector = new IntVector("encoded vector", allocator); - VarCharVector dictionary = new VarCharVector("dictionary", allocator)) { + IntVector encodedVector = new IntVector("encoded vector", allocator); + VarCharVector dictionary = new VarCharVector("dictionary", allocator)) { // set up dictionary dictionary.allocateNew(); @@ -91,8 +87,8 @@ public void testEncodeAndDecode() { rawVector.setValueCount(VECTOR_LENGTH); SearchDictionaryEncoder encoder = - new SearchDictionaryEncoder<>( - dictionary, DefaultVectorComparators.createDefaultComparator(rawVector), false); + new SearchDictionaryEncoder<>( + dictionary, DefaultVectorComparators.createDefaultComparator(rawVector), false); // perform encoding encodedVector.allocateNew(); @@ -101,17 +97,21 @@ public void testEncodeAndDecode() { // verify encoding results assertEquals(rawVector.getValueCount(), encodedVector.getValueCount()); for (int i = 0; i < VECTOR_LENGTH; i++) { - assertArrayEquals(rawVector.get(i), String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8)); + assertArrayEquals( + rawVector.get(i), + String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8)); } // perform decoding Dictionary dict = new Dictionary(dictionary, new DictionaryEncoding(1L, false, null)); - try (VarCharVector decodedVector = (VarCharVector) DictionaryEncoder.decode(encodedVector, dict)) { + try (VarCharVector decodedVector = + (VarCharVector) DictionaryEncoder.decode(encodedVector, dict)) { // verify decoding results assertEquals(encodedVector.getValueCount(), decodedVector.getValueCount()); for (int i = 0; i < VECTOR_LENGTH; i++) { - assertArrayEquals(String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8), + assertArrayEquals( + String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8), decodedVector.get(i)); } } @@ -122,8 +122,8 @@ public void testEncodeAndDecode() { public void testEncodeAndDecodeWithNull() { Random random = new Random(); try (VarCharVector rawVector = new VarCharVector("original vector", allocator); - IntVector encodedVector = new IntVector("encoded vector", allocator); - VarCharVector dictionary = new VarCharVector("dictionary", allocator)) { + IntVector encodedVector = new IntVector("encoded vector", allocator); + VarCharVector dictionary = new VarCharVector("dictionary", allocator)) { // set up dictionary dictionary.allocateNew(); @@ -147,8 +147,8 @@ public void testEncodeAndDecodeWithNull() { rawVector.setValueCount(VECTOR_LENGTH); SearchDictionaryEncoder encoder = - new SearchDictionaryEncoder<>( - dictionary, DefaultVectorComparators.createDefaultComparator(rawVector), true); + new SearchDictionaryEncoder<>( + dictionary, DefaultVectorComparators.createDefaultComparator(rawVector), true); // perform encoding encodedVector.allocateNew(); @@ -160,13 +160,16 @@ public void testEncodeAndDecodeWithNull() { if (i % 10 == 0) { assertEquals(0, encodedVector.get(i)); } else { - assertArrayEquals(rawVector.get(i), String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8)); + assertArrayEquals( + rawVector.get(i), + String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8)); } } // perform decoding Dictionary dict = new Dictionary(dictionary, new DictionaryEncoding(1L, false, null)); - try (VarCharVector decodedVector = (VarCharVector) DictionaryEncoder.decode(encodedVector, dict)) { + try (VarCharVector decodedVector = + (VarCharVector) DictionaryEncoder.decode(encodedVector, dict)) { // verify decoding results assertEquals(encodedVector.getValueCount(), decodedVector.getValueCount()); @@ -174,7 +177,8 @@ public void testEncodeAndDecodeWithNull() { if (i % 10 == 0) { assertTrue(decodedVector.isNull(i)); } else { - assertArrayEquals(String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8), + assertArrayEquals( + String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8), decodedVector.get(i)); } } @@ -185,8 +189,8 @@ public void testEncodeAndDecodeWithNull() { @Test public void testEncodeNullWithoutNullInDictionary() { try (VarCharVector rawVector = new VarCharVector("original vector", allocator); - IntVector encodedVector = new IntVector("encoded vector", allocator); - VarCharVector dictionary = new VarCharVector("dictionary", allocator)) { + IntVector encodedVector = new IntVector("encoded vector", allocator); + VarCharVector dictionary = new VarCharVector("dictionary", allocator)) { // set up dictionary, with no null in it. dictionary.allocateNew(); @@ -204,14 +208,16 @@ public void testEncodeNullWithoutNullInDictionary() { encodedVector.allocateNew(); SearchDictionaryEncoder encoder = - new SearchDictionaryEncoder<>( - dictionary, DefaultVectorComparators.createDefaultComparator(rawVector), true); + new SearchDictionaryEncoder<>( + dictionary, DefaultVectorComparators.createDefaultComparator(rawVector), true); // the encoder should encode null, but no null in the dictionary, // so an exception should be thrown. - assertThrows(IllegalArgumentException.class, () -> { - encoder.encode(rawVector, encodedVector); - }); + assertThrows( + IllegalArgumentException.class, + () -> { + encoder.encode(rawVector, encodedVector); + }); } } @@ -219,8 +225,8 @@ public void testEncodeNullWithoutNullInDictionary() { public void testEncodeStrings() { // Create a new value vector try (final VarCharVector vector = new VarCharVector("foo", allocator); - final IntVector encoded = new IntVector("encoded", allocator); - final VarCharVector dictionaryVector = new VarCharVector("dict", allocator)) { + final IntVector encoded = new IntVector("encoded", allocator); + final VarCharVector dictionaryVector = new VarCharVector("dict", allocator)) { vector.allocateNew(512, 5); encoded.allocateNew(); @@ -241,8 +247,8 @@ public void testEncodeStrings() { dictionaryVector.setValueCount(3); SearchDictionaryEncoder encoder = - new SearchDictionaryEncoder<>( - dictionaryVector, DefaultVectorComparators.createDefaultComparator(vector)); + new SearchDictionaryEncoder<>( + dictionaryVector, DefaultVectorComparators.createDefaultComparator(vector)); encoder.encode(vector, encoded); // verify indices @@ -268,8 +274,8 @@ public void testEncodeStrings() { public void testEncodeLargeVector() { // Create a new value vector try (final VarCharVector vector = new VarCharVector("foo", allocator); - final IntVector encoded = new IntVector("encoded", allocator); - final VarCharVector dictionaryVector = new VarCharVector("dict", allocator)) { + final IntVector encoded = new IntVector("encoded", allocator); + final VarCharVector dictionaryVector = new VarCharVector("dict", allocator)) { vector.allocateNew(); encoded.allocateNew(); @@ -287,8 +293,8 @@ public void testEncodeLargeVector() { dictionaryVector.setValueCount(3); SearchDictionaryEncoder encoder = - new SearchDictionaryEncoder<>( - dictionaryVector, DefaultVectorComparators.createDefaultComparator(vector)); + new SearchDictionaryEncoder<>( + dictionaryVector, DefaultVectorComparators.createDefaultComparator(vector)); encoder.encode(vector, encoded); assertEquals(count, encoded.getValueCount()); @@ -312,8 +318,8 @@ public void testEncodeLargeVector() { public void testEncodeBinaryVector() { // Create a new value vector try (final VarBinaryVector vector = new VarBinaryVector("foo", allocator); - final VarBinaryVector dictionaryVector = new VarBinaryVector("dict", allocator); - final IntVector encoded = new IntVector("encoded", allocator)) { + final VarBinaryVector dictionaryVector = new VarBinaryVector("dict", allocator); + final IntVector encoded = new IntVector("encoded", allocator)) { vector.allocateNew(512, 5); vector.allocateNew(); encoded.allocateNew(); @@ -334,8 +340,8 @@ public void testEncodeBinaryVector() { dictionaryVector.setValueCount(3); SearchDictionaryEncoder encoder = - new SearchDictionaryEncoder<>( - dictionaryVector, DefaultVectorComparators.createDefaultComparator(vector)); + new SearchDictionaryEncoder<>( + dictionaryVector, DefaultVectorComparators.createDefaultComparator(vector)); encoder.encode(vector, encoded); assertEquals(5, encoded.getValueCount()); diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestSearchTreeBasedDictionaryBuilder.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestSearchTreeBasedDictionaryBuilder.java index 340b7e67e861f..6c8a57c1a4648 100644 --- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestSearchTreeBasedDictionaryBuilder.java +++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestSearchTreeBasedDictionaryBuilder.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.dictionary; import static org.junit.Assert.assertEquals; @@ -22,7 +21,6 @@ import java.nio.charset.StandardCharsets; import java.util.Objects; - import org.apache.arrow.algorithm.sort.DefaultVectorComparators; import org.apache.arrow.algorithm.sort.VectorValueComparator; import org.apache.arrow.memory.BufferAllocator; @@ -33,9 +31,7 @@ import org.junit.Before; import org.junit.Test; -/** - * Test cases for {@link SearchTreeBasedDictionaryBuilder}. - */ +/** Test cases for {@link SearchTreeBasedDictionaryBuilder}. */ public class TestSearchTreeBasedDictionaryBuilder { private BufferAllocator allocator; @@ -53,8 +49,8 @@ public void shutdown() { @Test public void testBuildVariableWidthDictionaryWithNull() { try (VarCharVector vec = new VarCharVector("", allocator); - VarCharVector dictionary = new VarCharVector("", allocator); - VarCharVector sortedDictionary = new VarCharVector("", allocator)) { + VarCharVector dictionary = new VarCharVector("", allocator); + VarCharVector sortedDictionary = new VarCharVector("", allocator)) { vec.allocateNew(100, 10); vec.setValueCount(10); @@ -74,9 +70,10 @@ public void testBuildVariableWidthDictionaryWithNull() { vec.set(8, "good".getBytes(StandardCharsets.UTF_8)); vec.set(9, "abc".getBytes(StandardCharsets.UTF_8)); - VectorValueComparator comparator = DefaultVectorComparators.createDefaultComparator(vec); + VectorValueComparator comparator = + DefaultVectorComparators.createDefaultComparator(vec); SearchTreeBasedDictionaryBuilder dictionaryBuilder = - new SearchTreeBasedDictionaryBuilder<>(dictionary, comparator, true); + new SearchTreeBasedDictionaryBuilder<>(dictionary, comparator, true); int result = dictionaryBuilder.addValues(vec); @@ -86,20 +83,32 @@ public void testBuildVariableWidthDictionaryWithNull() { dictionaryBuilder.populateSortedDictionary(sortedDictionary); assertTrue(sortedDictionary.isNull(0)); - assertEquals("12", new String(Objects.requireNonNull(sortedDictionary.get(1)), StandardCharsets.UTF_8)); - assertEquals("abc", new String(Objects.requireNonNull(sortedDictionary.get(2)), StandardCharsets.UTF_8)); - assertEquals("dictionary", new String(Objects.requireNonNull(sortedDictionary.get(3)), StandardCharsets.UTF_8)); - assertEquals("good", new String(Objects.requireNonNull(sortedDictionary.get(4)), StandardCharsets.UTF_8)); - assertEquals("hello", new String(Objects.requireNonNull(sortedDictionary.get(5)), StandardCharsets.UTF_8)); - assertEquals("world", new String(Objects.requireNonNull(sortedDictionary.get(6)), StandardCharsets.UTF_8)); + assertEquals( + "12", + new String(Objects.requireNonNull(sortedDictionary.get(1)), StandardCharsets.UTF_8)); + assertEquals( + "abc", + new String(Objects.requireNonNull(sortedDictionary.get(2)), StandardCharsets.UTF_8)); + assertEquals( + "dictionary", + new String(Objects.requireNonNull(sortedDictionary.get(3)), StandardCharsets.UTF_8)); + assertEquals( + "good", + new String(Objects.requireNonNull(sortedDictionary.get(4)), StandardCharsets.UTF_8)); + assertEquals( + "hello", + new String(Objects.requireNonNull(sortedDictionary.get(5)), StandardCharsets.UTF_8)); + assertEquals( + "world", + new String(Objects.requireNonNull(sortedDictionary.get(6)), StandardCharsets.UTF_8)); } } @Test public void testBuildVariableWidthDictionaryWithoutNull() { try (VarCharVector vec = new VarCharVector("", allocator); - VarCharVector dictionary = new VarCharVector("", allocator); - VarCharVector sortedDictionary = new VarCharVector("", allocator)) { + VarCharVector dictionary = new VarCharVector("", allocator); + VarCharVector sortedDictionary = new VarCharVector("", allocator)) { vec.allocateNew(100, 10); vec.setValueCount(10); @@ -119,9 +128,10 @@ public void testBuildVariableWidthDictionaryWithoutNull() { vec.set(8, "good".getBytes(StandardCharsets.UTF_8)); vec.set(9, "abc".getBytes(StandardCharsets.UTF_8)); - VectorValueComparator comparator = DefaultVectorComparators.createDefaultComparator(vec); + VectorValueComparator comparator = + DefaultVectorComparators.createDefaultComparator(vec); SearchTreeBasedDictionaryBuilder dictionaryBuilder = - new SearchTreeBasedDictionaryBuilder<>(dictionary, comparator, false); + new SearchTreeBasedDictionaryBuilder<>(dictionary, comparator, false); int result = dictionaryBuilder.addValues(vec); @@ -130,20 +140,32 @@ public void testBuildVariableWidthDictionaryWithoutNull() { dictionaryBuilder.populateSortedDictionary(sortedDictionary); - assertEquals("12", new String(Objects.requireNonNull(sortedDictionary.get(0)), StandardCharsets.UTF_8)); - assertEquals("abc", new String(Objects.requireNonNull(sortedDictionary.get(1)), StandardCharsets.UTF_8)); - assertEquals("dictionary", new String(Objects.requireNonNull(sortedDictionary.get(2)), StandardCharsets.UTF_8)); - assertEquals("good", new String(Objects.requireNonNull(sortedDictionary.get(3)), StandardCharsets.UTF_8)); - assertEquals("hello", new String(Objects.requireNonNull(sortedDictionary.get(4)), StandardCharsets.UTF_8)); - assertEquals("world", new String(Objects.requireNonNull(sortedDictionary.get(5)), StandardCharsets.UTF_8)); + assertEquals( + "12", + new String(Objects.requireNonNull(sortedDictionary.get(0)), StandardCharsets.UTF_8)); + assertEquals( + "abc", + new String(Objects.requireNonNull(sortedDictionary.get(1)), StandardCharsets.UTF_8)); + assertEquals( + "dictionary", + new String(Objects.requireNonNull(sortedDictionary.get(2)), StandardCharsets.UTF_8)); + assertEquals( + "good", + new String(Objects.requireNonNull(sortedDictionary.get(3)), StandardCharsets.UTF_8)); + assertEquals( + "hello", + new String(Objects.requireNonNull(sortedDictionary.get(4)), StandardCharsets.UTF_8)); + assertEquals( + "world", + new String(Objects.requireNonNull(sortedDictionary.get(5)), StandardCharsets.UTF_8)); } } @Test public void testBuildFixedWidthDictionaryWithNull() { try (IntVector vec = new IntVector("", allocator); - IntVector dictionary = new IntVector("", allocator); - IntVector sortedDictionary = new IntVector("", allocator)) { + IntVector dictionary = new IntVector("", allocator); + IntVector sortedDictionary = new IntVector("", allocator)) { vec.allocateNew(10); vec.setValueCount(10); @@ -162,9 +184,10 @@ public void testBuildFixedWidthDictionaryWithNull() { vec.set(8, 4); vec.setNull(9); - VectorValueComparator comparator = DefaultVectorComparators.createDefaultComparator(vec); + VectorValueComparator comparator = + DefaultVectorComparators.createDefaultComparator(vec); SearchTreeBasedDictionaryBuilder dictionaryBuilder = - new SearchTreeBasedDictionaryBuilder<>(dictionary, comparator, true); + new SearchTreeBasedDictionaryBuilder<>(dictionary, comparator, true); int result = dictionaryBuilder.addValues(vec); @@ -184,8 +207,8 @@ public void testBuildFixedWidthDictionaryWithNull() { @Test public void testBuildFixedWidthDictionaryWithoutNull() { try (IntVector vec = new IntVector("", allocator); - IntVector dictionary = new IntVector("", allocator); - IntVector sortedDictionary = new IntVector("", allocator)) { + IntVector dictionary = new IntVector("", allocator); + IntVector sortedDictionary = new IntVector("", allocator)) { vec.allocateNew(10); vec.setValueCount(10); @@ -204,9 +227,10 @@ public void testBuildFixedWidthDictionaryWithoutNull() { vec.set(8, 4); vec.setNull(9); - VectorValueComparator comparator = DefaultVectorComparators.createDefaultComparator(vec); + VectorValueComparator comparator = + DefaultVectorComparators.createDefaultComparator(vec); SearchTreeBasedDictionaryBuilder dictionaryBuilder = - new SearchTreeBasedDictionaryBuilder<>(dictionary, comparator, false); + new SearchTreeBasedDictionaryBuilder<>(dictionary, comparator, false); int result = dictionaryBuilder.addValues(vec); diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/misc/TestPartialSumUtils.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/misc/TestPartialSumUtils.java index 630dd80b44084..e3ab981670e9e 100644 --- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/misc/TestPartialSumUtils.java +++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/misc/TestPartialSumUtils.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.misc; import static org.junit.Assert.assertEquals; @@ -26,9 +25,7 @@ import org.junit.Before; import org.junit.Test; -/** - * Test cases for {@link PartialSumUtils}. - */ +/** Test cases for {@link PartialSumUtils}. */ public class TestPartialSumUtils { private static final int PARTIAL_SUM_VECTOR_LENGTH = 101; @@ -50,7 +47,7 @@ public void shutdown() { @Test public void testToPartialSumVector() { try (IntVector delta = new IntVector("delta", allocator); - IntVector partialSum = new IntVector("partial sum", allocator)) { + IntVector partialSum = new IntVector("partial sum", allocator)) { delta.allocateNew(DELTA_VECTOR_LENGTH); delta.setValueCount(DELTA_VECTOR_LENGTH); @@ -75,7 +72,7 @@ public void testToPartialSumVector() { @Test public void testToDeltaVector() { try (IntVector partialSum = new IntVector("partial sum", allocator); - IntVector delta = new IntVector("delta", allocator)) { + IntVector delta = new IntVector("delta", allocator)) { partialSum.allocateNew(PARTIAL_SUM_VECTOR_LENGTH); partialSum.setValueCount(PARTIAL_SUM_VECTOR_LENGTH); @@ -111,7 +108,8 @@ public void testFindPositionInPartialSumVector() { // search and verify results for (int i = 0; i < PARTIAL_SUM_VECTOR_LENGTH - 1; i++) { - assertEquals(i, PartialSumUtils.findPositionInPartialSumVector(partialSum, sumBase + 3 * i + 1)); + assertEquals( + i, PartialSumUtils.findPositionInPartialSumVector(partialSum, sumBase + 3 * i + 1)); } } } @@ -131,8 +129,10 @@ public void testFindPositionInPartialSumVectorNegative() { // search and verify results assertEquals(0, PartialSumUtils.findPositionInPartialSumVector(partialSum, sumBase)); assertEquals(-1, PartialSumUtils.findPositionInPartialSumVector(partialSum, sumBase - 1)); - assertEquals(-1, PartialSumUtils.findPositionInPartialSumVector(partialSum, - sumBase + 3 * (PARTIAL_SUM_VECTOR_LENGTH - 1))); + assertEquals( + -1, + PartialSumUtils.findPositionInPartialSumVector( + partialSum, sumBase + 3 * (PARTIAL_SUM_VECTOR_LENGTH - 1))); } } } diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/rank/TestVectorRank.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/rank/TestVectorRank.java index 0e6627eb4822a..4b7c6a9756780 100644 --- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/rank/TestVectorRank.java +++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/rank/TestVectorRank.java @@ -14,14 +14,12 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.rank; import static org.junit.Assert.assertEquals; import static org.junit.jupiter.api.Assertions.assertThrows; import java.nio.charset.StandardCharsets; - import org.apache.arrow.algorithm.sort.DefaultVectorComparators; import org.apache.arrow.algorithm.sort.VectorValueComparator; import org.apache.arrow.memory.BufferAllocator; @@ -32,9 +30,7 @@ import org.junit.Before; import org.junit.Test; -/** - * Test cases for {@link org.apache.arrow.algorithm.rank.VectorRank}. - */ +/** Test cases for {@link org.apache.arrow.algorithm.rank.VectorRank}. */ public class TestVectorRank { private BufferAllocator allocator; @@ -70,7 +66,7 @@ public void testFixedWidthRank() { vector.set(9, 6); VectorValueComparator comparator = - DefaultVectorComparators.createDefaultComparator(vector); + DefaultVectorComparators.createDefaultComparator(vector); assertEquals(7, rank.indexAtRank(vector, comparator, 0)); assertEquals(0, rank.indexAtRank(vector, comparator, 1)); assertEquals(6, rank.indexAtRank(vector, comparator, 2)); @@ -103,7 +99,7 @@ public void testVariableWidthRank() { vector.set(9, String.valueOf(6).getBytes(StandardCharsets.UTF_8)); VectorValueComparator comparator = - DefaultVectorComparators.createDefaultComparator(vector); + DefaultVectorComparators.createDefaultComparator(vector); assertEquals(7, rank.indexAtRank(vector, comparator, 0)); assertEquals(0, rank.indexAtRank(vector, comparator, 1)); @@ -137,11 +133,13 @@ public void testRankNegative() { vector.set(9, 6); VectorValueComparator comparator = - DefaultVectorComparators.createDefaultComparator(vector); + DefaultVectorComparators.createDefaultComparator(vector); - assertThrows(IllegalArgumentException.class, () -> { - rank.indexAtRank(vector, comparator, VECTOR_LENGTH + 1); - }); + assertThrows( + IllegalArgumentException.class, + () -> { + rank.indexAtRank(vector, comparator, VECTOR_LENGTH + 1); + }); } } } diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/search/TestParallelSearcher.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/search/TestParallelSearcher.java index 9ccecfa84a73a..7ff86a743effd 100644 --- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/search/TestParallelSearcher.java +++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/search/TestParallelSearcher.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.search; import static org.junit.jupiter.api.Assertions.assertEquals; @@ -26,7 +25,6 @@ import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; - import org.apache.arrow.algorithm.sort.DefaultVectorComparators; import org.apache.arrow.algorithm.sort.VectorValueComparator; import org.apache.arrow.memory.BufferAllocator; @@ -39,9 +37,7 @@ import org.junit.runner.RunWith; import org.junit.runners.Parameterized; -/** - * Test cases for {@link ParallelSearcher}. - */ +/** Test cases for {@link ParallelSearcher}. */ @RunWith(Parameterized.class) public class TestParallelSearcher { @@ -97,8 +93,10 @@ public void testParallelIntSearch() throws ExecutionException, InterruptedExcept keyVector.allocateNew(VECTOR_LENGTH); // if we are comparing elements using equality semantics, we do not need a comparator here. - VectorValueComparator comparator = comparatorType == ComparatorType.EqualityComparator ? null - : DefaultVectorComparators.createDefaultComparator(targetVector); + VectorValueComparator comparator = + comparatorType == ComparatorType.EqualityComparator + ? null + : DefaultVectorComparators.createDefaultComparator(targetVector); for (int i = 0; i < VECTOR_LENGTH; i++) { targetVector.set(i, i); @@ -107,9 +105,13 @@ public void testParallelIntSearch() throws ExecutionException, InterruptedExcept targetVector.setValueCount(VECTOR_LENGTH); keyVector.setValueCount(VECTOR_LENGTH); - ParallelSearcher searcher = new ParallelSearcher<>(targetVector, threadPool, threadCount); + ParallelSearcher searcher = + new ParallelSearcher<>(targetVector, threadPool, threadCount); for (int i = 0; i < VECTOR_LENGTH; i++) { - int pos = comparator == null ? searcher.search(keyVector, i) : searcher.search(keyVector, i, comparator); + int pos = + comparator == null + ? searcher.search(keyVector, i) + : searcher.search(keyVector, i, comparator); if (i * 2 < VECTOR_LENGTH) { assertEquals(i * 2, pos); } else { @@ -122,13 +124,15 @@ public void testParallelIntSearch() throws ExecutionException, InterruptedExcept @Test public void testParallelStringSearch() throws ExecutionException, InterruptedException { try (VarCharVector targetVector = new VarCharVector("targetVector", allocator); - VarCharVector keyVector = new VarCharVector("keyVector", allocator)) { + VarCharVector keyVector = new VarCharVector("keyVector", allocator)) { targetVector.allocateNew(VECTOR_LENGTH); keyVector.allocateNew(VECTOR_LENGTH); // if we are comparing elements using equality semantics, we do not need a comparator here. - VectorValueComparator comparator = comparatorType == ComparatorType.EqualityComparator ? null - : DefaultVectorComparators.createDefaultComparator(targetVector); + VectorValueComparator comparator = + comparatorType == ComparatorType.EqualityComparator + ? null + : DefaultVectorComparators.createDefaultComparator(targetVector); for (int i = 0; i < VECTOR_LENGTH; i++) { targetVector.setSafe(i, String.valueOf(i).getBytes(StandardCharsets.UTF_8)); @@ -137,9 +141,13 @@ public void testParallelStringSearch() throws ExecutionException, InterruptedExc targetVector.setValueCount(VECTOR_LENGTH); keyVector.setValueCount(VECTOR_LENGTH); - ParallelSearcher searcher = new ParallelSearcher<>(targetVector, threadPool, threadCount); + ParallelSearcher searcher = + new ParallelSearcher<>(targetVector, threadPool, threadCount); for (int i = 0; i < VECTOR_LENGTH; i++) { - int pos = comparator == null ? searcher.search(keyVector, i) : searcher.search(keyVector, i, comparator); + int pos = + comparator == null + ? searcher.search(keyVector, i) + : searcher.search(keyVector, i, comparator); if (i * 2 < VECTOR_LENGTH) { assertEquals(i * 2, pos); } else { diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/search/TestVectorRangeSearcher.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/search/TestVectorRangeSearcher.java index 18f4fa0355f4f..39f2f609f7df4 100644 --- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/search/TestVectorRangeSearcher.java +++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/search/TestVectorRangeSearcher.java @@ -14,14 +14,12 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.search; import static org.junit.Assert.assertEquals; import java.util.Arrays; import java.util.Collection; - import org.apache.arrow.algorithm.sort.DefaultVectorComparators; import org.apache.arrow.algorithm.sort.VectorValueComparator; import org.apache.arrow.memory.BufferAllocator; @@ -33,9 +31,7 @@ import org.junit.runner.RunWith; import org.junit.runners.Parameterized; -/** - * Test cases for {@link VectorRangeSearcher}. - */ +/** Test cases for {@link VectorRangeSearcher}. */ @RunWith(Parameterized.class) public class TestVectorRangeSearcher { @@ -78,9 +74,11 @@ public void testGetLowerBounds() { } // do search - VectorValueComparator comparator = DefaultVectorComparators.createDefaultComparator(intVector); + VectorValueComparator comparator = + DefaultVectorComparators.createDefaultComparator(intVector); for (int i = 0; i < maxValue; i++) { - int result = VectorRangeSearcher.getFirstMatch(intVector, comparator, intVector, i * repeat); + int result = + VectorRangeSearcher.getFirstMatch(intVector, comparator, intVector, i * repeat); assertEquals(i * ((long) repeat), result); } } @@ -112,7 +110,8 @@ public void testGetLowerBoundsNegative() { } // do search - VectorValueComparator comparator = DefaultVectorComparators.createDefaultComparator(intVector); + VectorValueComparator comparator = + DefaultVectorComparators.createDefaultComparator(intVector); for (int i = 0; i < maxValue; i++) { int result = VectorRangeSearcher.getFirstMatch(intVector, comparator, negVector, i); assertEquals(-1, result); @@ -141,7 +140,8 @@ public void testGetUpperBounds() { } // do search - VectorValueComparator comparator = DefaultVectorComparators.createDefaultComparator(intVector); + VectorValueComparator comparator = + DefaultVectorComparators.createDefaultComparator(intVector); for (int i = 0; i < maxValue; i++) { int result = VectorRangeSearcher.getLastMatch(intVector, comparator, intVector, i * repeat); assertEquals((i + 1) * repeat - 1, result); @@ -153,7 +153,7 @@ public void testGetUpperBounds() { public void testGetUpperBoundsNegative() { final int maxValue = 100; try (IntVector intVector = new IntVector("int vec", allocator); - IntVector negVector = new IntVector("neg vec", allocator)) { + IntVector negVector = new IntVector("neg vec", allocator)) { // allocate vector intVector.allocateNew(maxValue * repeat); intVector.setValueCount(maxValue * repeat); @@ -175,7 +175,8 @@ public void testGetUpperBoundsNegative() { } // do search - VectorValueComparator comparator = DefaultVectorComparators.createDefaultComparator(intVector); + VectorValueComparator comparator = + DefaultVectorComparators.createDefaultComparator(intVector); for (int i = 0; i < maxValue; i++) { int result = VectorRangeSearcher.getLastMatch(intVector, comparator, negVector, i); assertEquals(-1, result); @@ -185,11 +186,6 @@ public void testGetUpperBoundsNegative() { @Parameterized.Parameters(name = "repeat = {0}") public static Collection getRepeat() { - return Arrays.asList( - new Object[]{1}, - new Object[]{2}, - new Object[]{5}, - new Object[]{10} - ); + return Arrays.asList(new Object[] {1}, new Object[] {2}, new Object[] {5}, new Object[] {10}); } } diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/search/TestVectorSearcher.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/search/TestVectorSearcher.java index 32fa10bbd98d0..629d900b479b6 100644 --- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/search/TestVectorSearcher.java +++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/search/TestVectorSearcher.java @@ -14,14 +14,12 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.search; import static org.apache.arrow.vector.complex.BaseRepeatedValueVector.OFFSET_WIDTH; import static org.junit.Assert.assertEquals; import java.nio.charset.StandardCharsets; - import org.apache.arrow.algorithm.sort.DefaultVectorComparators; import org.apache.arrow.algorithm.sort.VectorValueComparator; import org.apache.arrow.memory.BufferAllocator; @@ -37,9 +35,7 @@ import org.junit.Before; import org.junit.Test; -/** - * Test cases for {@link org.apache.arrow.algorithm.search.VectorSearcher}. - */ +/** Test cases for {@link org.apache.arrow.algorithm.search.VectorSearcher}. */ public class TestVectorSearcher { private final int VECTOR_LENGTH = 100; @@ -59,7 +55,7 @@ public void shutdown() { @Test public void testBinarySearchInt() { try (IntVector rawVector = new IntVector("", allocator); - IntVector negVector = new IntVector("", allocator)) { + IntVector negVector = new IntVector("", allocator)) { rawVector.allocateNew(VECTOR_LENGTH); rawVector.setValueCount(VECTOR_LENGTH); negVector.allocateNew(1); @@ -77,7 +73,7 @@ public void testBinarySearchInt() { // do search VectorValueComparator comparator = - DefaultVectorComparators.createDefaultComparator(rawVector); + DefaultVectorComparators.createDefaultComparator(rawVector); for (int i = 0; i < VECTOR_LENGTH; i++) { int result = VectorSearcher.binarySearch(rawVector, comparator, rawVector, i); assertEquals(i, result); @@ -91,7 +87,7 @@ public void testBinarySearchInt() { @Test public void testLinearSearchInt() { try (IntVector rawVector = new IntVector("", allocator); - IntVector negVector = new IntVector("", allocator)) { + IntVector negVector = new IntVector("", allocator)) { rawVector.allocateNew(VECTOR_LENGTH); rawVector.setValueCount(VECTOR_LENGTH); negVector.allocateNew(1); @@ -109,7 +105,7 @@ public void testLinearSearchInt() { // do search VectorValueComparator comparator = - DefaultVectorComparators.createDefaultComparator(rawVector); + DefaultVectorComparators.createDefaultComparator(rawVector); for (int i = 0; i < VECTOR_LENGTH; i++) { int result = VectorSearcher.linearSearch(rawVector, comparator, rawVector, i); assertEquals(i, result); @@ -123,7 +119,7 @@ public void testLinearSearchInt() { @Test public void testBinarySearchVarChar() { try (VarCharVector rawVector = new VarCharVector("", allocator); - VarCharVector negVector = new VarCharVector("", allocator)) { + VarCharVector negVector = new VarCharVector("", allocator)) { rawVector.allocateNew(VECTOR_LENGTH * 16, VECTOR_LENGTH); rawVector.setValueCount(VECTOR_LENGTH); negVector.allocateNew(VECTOR_LENGTH, 1); @@ -148,7 +144,7 @@ public void testBinarySearchVarChar() { // do search VectorValueComparator comparator = - DefaultVectorComparators.createDefaultComparator(rawVector); + DefaultVectorComparators.createDefaultComparator(rawVector); for (int i = 0; i < VECTOR_LENGTH; i++) { int result = VectorSearcher.binarySearch(rawVector, comparator, rawVector, i); assertEquals(i, result); @@ -162,7 +158,7 @@ public void testBinarySearchVarChar() { @Test public void testLinearSearchVarChar() { try (VarCharVector rawVector = new VarCharVector("", allocator); - VarCharVector negVector = new VarCharVector("", allocator)) { + VarCharVector negVector = new VarCharVector("", allocator)) { rawVector.allocateNew(VECTOR_LENGTH * 16, VECTOR_LENGTH); rawVector.setValueCount(VECTOR_LENGTH); negVector.allocateNew(VECTOR_LENGTH, 1); @@ -187,7 +183,7 @@ public void testLinearSearchVarChar() { // do search VectorValueComparator comparator = - DefaultVectorComparators.createDefaultComparator(rawVector); + DefaultVectorComparators.createDefaultComparator(rawVector); for (int i = 0; i < VECTOR_LENGTH; i++) { int result = VectorSearcher.linearSearch(rawVector, comparator, rawVector, i); assertEquals(i, result); @@ -260,11 +256,11 @@ private ListVector createNegativeListVector() { @Test public void testBinarySearchList() { try (ListVector rawVector = createListVector(); - ListVector negVector = createNegativeListVector()) { + ListVector negVector = createNegativeListVector()) { // do search VectorValueComparator comparator = - DefaultVectorComparators.createDefaultComparator(rawVector); + DefaultVectorComparators.createDefaultComparator(rawVector); for (int i = 0; i < rawVector.getValueCount(); i++) { int result = VectorSearcher.binarySearch(rawVector, comparator, rawVector, i); assertEquals(i, result); @@ -281,11 +277,11 @@ public void testBinarySearchList() { @Test public void testLinearSearchList() { try (ListVector rawVector = createListVector(); - ListVector negVector = createNegativeListVector()) { + ListVector negVector = createNegativeListVector()) { // do search VectorValueComparator comparator = - DefaultVectorComparators.createDefaultComparator(rawVector); + DefaultVectorComparators.createDefaultComparator(rawVector); for (int i = 0; i < rawVector.getValueCount(); i++) { int result = VectorSearcher.linearSearch(rawVector, comparator, rawVector, i); assertEquals(i, result); diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestCompositeVectorComparator.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestCompositeVectorComparator.java index 9624432924b5a..21f6c0217c376 100644 --- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestCompositeVectorComparator.java +++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestCompositeVectorComparator.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.sort; import static org.junit.jupiter.api.Assertions.assertEquals; @@ -22,7 +21,6 @@ import java.nio.charset.StandardCharsets; import java.util.Arrays; - import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; import org.apache.arrow.vector.IntVector; @@ -33,9 +31,7 @@ import org.junit.Before; import org.junit.Test; -/** - * Test cases for {@link CompositeVectorComparator}. - */ +/** Test cases for {@link CompositeVectorComparator}. */ public class TestCompositeVectorComparator { private BufferAllocator allocator; @@ -60,7 +56,7 @@ public void testCompareVectorSchemaRoot() { VarCharVector strVec2 = new VarCharVector("str2", allocator); try (VectorSchemaRoot batch1 = new VectorSchemaRoot(Arrays.asList(intVec1, strVec1)); - VectorSchemaRoot batch2 = new VectorSchemaRoot(Arrays.asList(intVec2, strVec2))) { + VectorSchemaRoot batch2 = new VectorSchemaRoot(Arrays.asList(intVec2, strVec2))) { intVec1.allocateNew(vectorLength); strVec1.allocateNew(vectorLength * 10, vectorLength); @@ -75,15 +71,15 @@ public void testCompareVectorSchemaRoot() { } VectorValueComparator innerComparator1 = - DefaultVectorComparators.createDefaultComparator(intVec1); + DefaultVectorComparators.createDefaultComparator(intVec1); innerComparator1.attachVectors(intVec1, intVec2); VectorValueComparator innerComparator2 = - DefaultVectorComparators.createDefaultComparator(strVec1); + DefaultVectorComparators.createDefaultComparator(strVec1); innerComparator2.attachVectors(strVec1, strVec2); - VectorValueComparator comparator = new CompositeVectorComparator( - new VectorValueComparator[]{innerComparator1, innerComparator2} - ); + VectorValueComparator comparator = + new CompositeVectorComparator( + new VectorValueComparator[] {innerComparator1, innerComparator2}); // verify results diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestDefaultVectorComparator.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestDefaultVectorComparator.java index c40854fb17410..f1b3d6fb5aa1d 100644 --- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestDefaultVectorComparator.java +++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestDefaultVectorComparator.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.sort; import static org.apache.arrow.vector.complex.BaseRepeatedValueVector.OFFSET_WIDTH; @@ -67,9 +66,7 @@ import org.junit.Test; import org.junit.jupiter.api.Assertions; -/** - * Test cases for {@link DefaultVectorComparators}. - */ +/** Test cases for {@link DefaultVectorComparators}. */ public class TestDefaultVectorComparator { private BufferAllocator allocator; @@ -111,9 +108,9 @@ private ListVector createListVector(int count) { @Test public void testCompareLists() { try (ListVector listVector1 = createListVector(10); - ListVector listVector2 = createListVector(11)) { + ListVector listVector2 = createListVector(11)) { VectorValueComparator comparator = - DefaultVectorComparators.createDefaultComparator(listVector1); + DefaultVectorComparators.createDefaultComparator(listVector1); comparator.attachVectors(listVector1, listVector2); // prefix is smaller @@ -121,11 +118,11 @@ public void testCompareLists() { } try (ListVector listVector1 = createListVector(11); - ListVector listVector2 = createListVector(11)) { + ListVector listVector2 = createListVector(11)) { ((IntVector) listVector2.getDataVector()).set(10, 110); VectorValueComparator comparator = - DefaultVectorComparators.createDefaultComparator(listVector1); + DefaultVectorComparators.createDefaultComparator(listVector1); comparator.attachVectors(listVector1, listVector2); // breaking tie by the last element @@ -133,10 +130,10 @@ public void testCompareLists() { } try (ListVector listVector1 = createListVector(10); - ListVector listVector2 = createListVector(10)) { + ListVector listVector2 = createListVector(10)) { VectorValueComparator comparator = - DefaultVectorComparators.createDefaultComparator(listVector1); + DefaultVectorComparators.createDefaultComparator(listVector1); comparator.attachVectors(listVector1, listVector2); // list vector elements equal @@ -149,9 +146,9 @@ public void testCopiedComparatorForLists() { for (int i = 1; i < 10; i++) { for (int j = 1; j < 10; j++) { try (ListVector listVector1 = createListVector(10); - ListVector listVector2 = createListVector(11)) { + ListVector listVector2 = createListVector(11)) { VectorValueComparator comparator = - DefaultVectorComparators.createDefaultComparator(listVector1); + DefaultVectorComparators.createDefaultComparator(listVector1); comparator.attachVectors(listVector1, listVector2); VectorValueComparator copyComparator = comparator.createNew(); @@ -185,7 +182,7 @@ private FixedSizeListVector createFixedSizeListVector(int count) { @Test public void testCompareFixedSizeLists() { try (FixedSizeListVector listVector1 = createFixedSizeListVector(10); - FixedSizeListVector listVector2 = createFixedSizeListVector(11)) { + FixedSizeListVector listVector2 = createFixedSizeListVector(11)) { VectorValueComparator comparator = DefaultVectorComparators.createDefaultComparator(listVector1); comparator.attachVectors(listVector1, listVector2); @@ -195,7 +192,7 @@ public void testCompareFixedSizeLists() { } try (FixedSizeListVector listVector1 = createFixedSizeListVector(11); - FixedSizeListVector listVector2 = createFixedSizeListVector(11)) { + FixedSizeListVector listVector2 = createFixedSizeListVector(11)) { ((IntVector) listVector2.getDataVector()).set(10, 110); VectorValueComparator comparator = @@ -207,7 +204,7 @@ public void testCompareFixedSizeLists() { } try (FixedSizeListVector listVector1 = createFixedSizeListVector(10); - FixedSizeListVector listVector2 = createFixedSizeListVector(10)) { + FixedSizeListVector listVector2 = createFixedSizeListVector(10)) { VectorValueComparator comparator = DefaultVectorComparators.createDefaultComparator(listVector1); @@ -236,7 +233,7 @@ public void testCompareUInt1() { vec.set(9, Byte.MIN_VALUE); VectorValueComparator comparator = - DefaultVectorComparators.createDefaultComparator(vec); + DefaultVectorComparators.createDefaultComparator(vec); comparator.attachVector(vec); assertTrue(comparator.compare(0, 1) < 0); @@ -259,14 +256,21 @@ public void testCompareUInt2() { vec.allocateNew(10); ValueVectorDataPopulator.setVector( - vec, null, (char) (Character.MAX_VALUE - 1), Character.MAX_VALUE, (char) 0, (char) 1, - (char) 2, (char) (Character.MAX_VALUE - 1), null, + vec, + null, + (char) (Character.MAX_VALUE - 1), + Character.MAX_VALUE, + (char) 0, + (char) 1, + (char) 2, + (char) (Character.MAX_VALUE - 1), + null, '\u7FFF', // value for the max 16-byte signed integer '\u8000' // value for the min 16-byte signed integer - ); + ); VectorValueComparator comparator = - DefaultVectorComparators.createDefaultComparator(vec); + DefaultVectorComparators.createDefaultComparator(vec); comparator.attachVector(vec); assertTrue(comparator.compare(0, 1) < 0); @@ -301,7 +305,7 @@ public void testCompareUInt4() { vec.set(9, Integer.MIN_VALUE); VectorValueComparator comparator = - DefaultVectorComparators.createDefaultComparator(vec); + DefaultVectorComparators.createDefaultComparator(vec); comparator.attachVector(vec); assertTrue(comparator.compare(0, 1) < 0); @@ -336,7 +340,7 @@ public void testCompareUInt8() { vec.set(9, Long.MIN_VALUE); VectorValueComparator comparator = - DefaultVectorComparators.createDefaultComparator(vec); + DefaultVectorComparators.createDefaultComparator(vec); comparator.attachVector(vec); assertTrue(comparator.compare(0, 1) < 0); @@ -358,7 +362,16 @@ public void testCompareFloat4() { try (Float4Vector vec = new Float4Vector("", allocator)) { vec.allocateNew(9); ValueVectorDataPopulator.setVector( - vec, -1.1f, 0.0f, 1.0f, null, 1.0f, 2.0f, Float.NaN, Float.NaN, Float.POSITIVE_INFINITY, + vec, + -1.1f, + 0.0f, + 1.0f, + null, + 1.0f, + 2.0f, + Float.NaN, + Float.NaN, + Float.POSITIVE_INFINITY, Float.NEGATIVE_INFINITY); VectorValueComparator comparator = @@ -393,7 +406,16 @@ public void testCompareFloat8() { try (Float8Vector vec = new Float8Vector("", allocator)) { vec.allocateNew(9); ValueVectorDataPopulator.setVector( - vec, -1.1, 0.0, 1.0, null, 1.0, 2.0, Double.NaN, Double.NaN, Double.POSITIVE_INFINITY, + vec, + -1.1, + 0.0, + 1.0, + null, + 1.0, + 2.0, + Double.NaN, + Double.NaN, + Double.POSITIVE_INFINITY, Double.NEGATIVE_INFINITY); VectorValueComparator comparator = @@ -488,8 +510,15 @@ public void testCompareShort() { try (SmallIntVector vec = new SmallIntVector("", allocator)) { vec.allocateNew(8); ValueVectorDataPopulator.setVector( - vec, (short) -1, (short) 0, (short) 1, null, (short) 1, (short) 5, - (short) (Short.MIN_VALUE + 1), Short.MAX_VALUE); + vec, + (short) -1, + (short) 0, + (short) 1, + null, + (short) 1, + (short) 5, + (short) (Short.MIN_VALUE + 1), + Short.MAX_VALUE); VectorValueComparator comparator = DefaultVectorComparators.createDefaultComparator(vec); @@ -519,8 +548,15 @@ public void testCompareByte() { try (TinyIntVector vec = new TinyIntVector("", allocator)) { vec.allocateNew(8); ValueVectorDataPopulator.setVector( - vec, (byte) -1, (byte) 0, (byte) 1, null, (byte) 1, (byte) 5, - (byte) (Byte.MIN_VALUE + 1), Byte.MAX_VALUE); + vec, + (byte) -1, + (byte) 0, + (byte) 1, + null, + (byte) 1, + (byte) 5, + (byte) (Byte.MIN_VALUE + 1), + Byte.MAX_VALUE); VectorValueComparator comparator = DefaultVectorComparators.createDefaultComparator(vec); @@ -549,8 +585,7 @@ public void testCompareByte() { public void testCompareBit() { try (BitVector vec = new BitVector("", allocator)) { vec.allocateNew(6); - ValueVectorDataPopulator.setVector( - vec, 1, 2, 0, 0, -1, null); + ValueVectorDataPopulator.setVector(vec, 1, 2, 0, 0, -1, null); VectorValueComparator comparator = DefaultVectorComparators.createDefaultComparator(vec); @@ -691,7 +726,8 @@ public void testCompareDecimal256() { @Test public void testCompareDuration() { try (DurationVector vec = - new DurationVector("", FieldType.nullable(new ArrowType.Duration(TimeUnit.MILLISECOND)), allocator)) { + new DurationVector( + "", FieldType.nullable(new ArrowType.Duration(TimeUnit.MILLISECOND)), allocator)) { vec.allocateNew(8); ValueVectorDataPopulator.setVector( vec, -1L, 0L, 1L, null, 1L, 5L, Long.MIN_VALUE + 1L, Long.MAX_VALUE); @@ -722,7 +758,8 @@ public void testCompareDuration() { @Test public void testCompareIntervalDay() { try (IntervalDayVector vec = - new IntervalDayVector("", FieldType.nullable(new ArrowType.Duration(TimeUnit.MILLISECOND)), allocator)) { + new IntervalDayVector( + "", FieldType.nullable(new ArrowType.Duration(TimeUnit.MILLISECOND)), allocator)) { vec.allocateNew(8); vec.set(0, -1, 0); vec.set(1, 0, 0); @@ -755,8 +792,7 @@ public void testCompareIntervalDay() { @Test public void testCompareTimeMicro() { - try (TimeMicroVector vec = - new TimeMicroVector("", allocator)) { + try (TimeMicroVector vec = new TimeMicroVector("", allocator)) { vec.allocateNew(8); ValueVectorDataPopulator.setVector( vec, -1L, 0L, 1L, null, 1L, 5L, Long.MIN_VALUE + 1L, Long.MAX_VALUE); @@ -816,8 +852,7 @@ public void testCompareTimeMilli() { @Test public void testCompareTimeNano() { - try (TimeNanoVector vec = - new TimeNanoVector("", allocator)) { + try (TimeNanoVector vec = new TimeNanoVector("", allocator)) { vec.allocateNew(8); ValueVectorDataPopulator.setVector( vec, -1L, 0L, 1L, null, 1L, 5L, Long.MIN_VALUE + 1L, Long.MAX_VALUE); @@ -877,8 +912,7 @@ public void testCompareTimeSec() { @Test public void testCompareTimeStamp() { - try (TimeStampMilliVector vec = - new TimeStampMilliVector("", allocator)) { + try (TimeStampMilliVector vec = new TimeStampMilliVector("", allocator)) { vec.allocateNew(8); ValueVectorDataPopulator.setVector( vec, -1L, 0L, 1L, null, 1L, 5L, Long.MIN_VALUE + 1L, Long.MAX_VALUE); @@ -909,7 +943,7 @@ public void testCompareTimeStamp() { @Test public void testCompareFixedSizeBinary() { try (FixedSizeBinaryVector vector1 = new FixedSizeBinaryVector("test1", allocator, 2); - FixedSizeBinaryVector vector2 = new FixedSizeBinaryVector("test1", allocator, 3)) { + FixedSizeBinaryVector vector2 = new FixedSizeBinaryVector("test1", allocator, 3)) { vector1.allocateNew(); vector2.allocateNew(); vector1.set(0, new byte[] {1, 1}); @@ -923,7 +957,7 @@ public void testCompareFixedSizeBinary() { } try (FixedSizeBinaryVector vector1 = new FixedSizeBinaryVector("test1", allocator, 3); - FixedSizeBinaryVector vector2 = new FixedSizeBinaryVector("test1", allocator, 3)) { + FixedSizeBinaryVector vector2 = new FixedSizeBinaryVector("test1", allocator, 3)) { vector1.allocateNew(); vector2.allocateNew(); vector1.set(0, new byte[] {1, 1, 0}); @@ -937,7 +971,7 @@ public void testCompareFixedSizeBinary() { } try (FixedSizeBinaryVector vector1 = new FixedSizeBinaryVector("test1", allocator, 3); - FixedSizeBinaryVector vector2 = new FixedSizeBinaryVector("test1", allocator, 3)) { + FixedSizeBinaryVector vector2 = new FixedSizeBinaryVector("test1", allocator, 3)) { vector1.allocateNew(); vector2.allocateNew(); vector1.set(0, new byte[] {1, 1, 1}); @@ -953,8 +987,8 @@ public void testCompareFixedSizeBinary() { @Test public void testCompareNull() { - try (NullVector vec = new NullVector("test", - FieldType.notNullable(new ArrowType.Int(32, false)))) { + try (NullVector vec = + new NullVector("test", FieldType.notNullable(new ArrowType.Int(32, false)))) { vec.setValueCount(2); VectorValueComparator comparator = @@ -967,12 +1001,14 @@ public void testCompareNull() { @Test public void testCheckNullsOnCompareIsFalseForNonNullableVector() { - try (IntVector vec = new IntVector("not nullable", - FieldType.notNullable(new ArrowType.Int(32, false)), allocator)) { + try (IntVector vec = + new IntVector( + "not nullable", FieldType.notNullable(new ArrowType.Int(32, false)), allocator)) { ValueVectorDataPopulator.setVector(vec, 1, 2, 3, 4); - final VectorValueComparator comparator = DefaultVectorComparators.createDefaultComparator(vec); + final VectorValueComparator comparator = + DefaultVectorComparators.createDefaultComparator(vec); comparator.attachVector(vec); assertFalse(comparator.checkNullsOnCompare()); @@ -981,16 +1017,17 @@ public void testCheckNullsOnCompareIsFalseForNonNullableVector() { @Test public void testCheckNullsOnCompareIsTrueForNullableVector() { - try (IntVector vec = new IntVector("nullable", FieldType.nullable( - new ArrowType.Int(32, false)), allocator); - IntVector vec2 = new IntVector("not-nullable", FieldType.notNullable( - new ArrowType.Int(32, false)), allocator) - ) { + try (IntVector vec = + new IntVector("nullable", FieldType.nullable(new ArrowType.Int(32, false)), allocator); + IntVector vec2 = + new IntVector( + "not-nullable", FieldType.notNullable(new ArrowType.Int(32, false)), allocator)) { ValueVectorDataPopulator.setVector(vec, 1, null, 3, 4); ValueVectorDataPopulator.setVector(vec2, 1, 2, 3, 4); - final VectorValueComparator comparator = DefaultVectorComparators.createDefaultComparator(vec); + final VectorValueComparator comparator = + DefaultVectorComparators.createDefaultComparator(vec); comparator.attachVector(vec); assertTrue(comparator.checkNullsOnCompare()); @@ -1001,17 +1038,18 @@ public void testCheckNullsOnCompareIsTrueForNullableVector() { @Test public void testCheckNullsOnCompareIsFalseWithNoNulls() { - try (IntVector vec = new IntVector("nullable", FieldType.nullable( - new ArrowType.Int(32, false)), allocator); - IntVector vec2 = new IntVector("also-nullable", FieldType.nullable( - new ArrowType.Int(32, false)), allocator) - ) { + try (IntVector vec = + new IntVector("nullable", FieldType.nullable(new ArrowType.Int(32, false)), allocator); + IntVector vec2 = + new IntVector( + "also-nullable", FieldType.nullable(new ArrowType.Int(32, false)), allocator)) { // no null values ValueVectorDataPopulator.setVector(vec, 1, 2, 3, 4); ValueVectorDataPopulator.setVector(vec2, 1, 2, 3, 4); - final VectorValueComparator comparator = DefaultVectorComparators.createDefaultComparator(vec); + final VectorValueComparator comparator = + DefaultVectorComparators.createDefaultComparator(vec); comparator.attachVector(vec); assertFalse(comparator.checkNullsOnCompare()); @@ -1022,13 +1060,14 @@ public void testCheckNullsOnCompareIsFalseWithNoNulls() { @Test public void testCheckNullsOnCompareIsTrueWithEmptyVectors() { - try (IntVector vec = new IntVector("nullable", FieldType.nullable( - new ArrowType.Int(32, false)), allocator); - IntVector vec2 = new IntVector("also-nullable", FieldType.nullable( - new ArrowType.Int(32, false)), allocator) - ) { + try (IntVector vec = + new IntVector("nullable", FieldType.nullable(new ArrowType.Int(32, false)), allocator); + IntVector vec2 = + new IntVector( + "also-nullable", FieldType.nullable(new ArrowType.Int(32, false)), allocator)) { - final VectorValueComparator comparator = DefaultVectorComparators.createDefaultComparator(vec); + final VectorValueComparator comparator = + DefaultVectorComparators.createDefaultComparator(vec); comparator.attachVector(vec2); assertTrue(comparator.checkNullsOnCompare()); diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestFixedWidthInPlaceVectorSorter.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestFixedWidthInPlaceVectorSorter.java index 91ef52017df4d..ed5aadfcda04c 100644 --- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestFixedWidthInPlaceVectorSorter.java +++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestFixedWidthInPlaceVectorSorter.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.sort; import static org.junit.Assert.assertEquals; @@ -22,7 +21,6 @@ import static org.junit.jupiter.api.Assertions.assertArrayEquals; import java.util.stream.IntStream; - import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; import org.apache.arrow.vector.IntVector; @@ -32,9 +30,7 @@ import org.junit.Before; import org.junit.Test; -/** - * Test cases for {@link FixedWidthInPlaceVectorSorter}. - */ +/** Test cases for {@link FixedWidthInPlaceVectorSorter}. */ public class TestFixedWidthInPlaceVectorSorter { private BufferAllocator allocator; @@ -69,7 +65,8 @@ public void testSortInt() { // sort the vector FixedWidthInPlaceVectorSorter sorter = new FixedWidthInPlaceVectorSorter(); - VectorValueComparator comparator = DefaultVectorComparators.createDefaultComparator(vec); + VectorValueComparator comparator = + DefaultVectorComparators.createDefaultComparator(vec); sorter.sortInPlace(vec, comparator); @@ -90,8 +87,8 @@ public void testSortInt() { } /** - * Tests the worst case for quick sort. - * It may cause stack overflow if the algorithm is implemented as a recursive algorithm. + * Tests the worst case for quick sort. It may cause stack overflow if the algorithm is + * implemented as a recursive algorithm. */ @Test public void testSortLargeIncreasingInt() { @@ -107,7 +104,8 @@ public void testSortLargeIncreasingInt() { // sort the vector FixedWidthInPlaceVectorSorter sorter = new FixedWidthInPlaceVectorSorter(); - VectorValueComparator comparator = DefaultVectorComparators.createDefaultComparator(vec); + VectorValueComparator comparator = + DefaultVectorComparators.createDefaultComparator(vec); sorter.sortInPlace(vec, comparator); @@ -133,7 +131,8 @@ public void testChoosePivot() { vec.setValueCount(vectorLength); FixedWidthInPlaceVectorSorter sorter = new FixedWidthInPlaceVectorSorter(); - VectorValueComparator comparator = DefaultVectorComparators.createDefaultComparator(vec); + VectorValueComparator comparator = + DefaultVectorComparators.createDefaultComparator(vec); try (IntVector pivotBuffer = (IntVector) vec.getField().createVector(allocator)) { // setup internal data structures @@ -164,16 +163,15 @@ public void testChoosePivot() { } } - /** - * Evaluates choosing pivot for all possible permutations of 3 numbers. - */ + /** Evaluates choosing pivot for all possible permutations of 3 numbers. */ @Test public void testChoosePivotAllPermutes() { try (IntVector vec = new IntVector("", allocator)) { vec.allocateNew(3); FixedWidthInPlaceVectorSorter sorter = new FixedWidthInPlaceVectorSorter(); - VectorValueComparator comparator = DefaultVectorComparators.createDefaultComparator(vec); + VectorValueComparator comparator = + DefaultVectorComparators.createDefaultComparator(vec); try (IntVector pivotBuffer = (IntVector) vec.getField().createVector(allocator)) { // setup internal data structures @@ -216,25 +214,25 @@ public void testChoosePivotAllPermutes() { @Test public void testSortInt2() { try (IntVector vector = new IntVector("vector", allocator)) { - ValueVectorDataPopulator.setVector(vector, - 0, 1, 2, 3, 4, 5, 30, 31, 32, 33, - 34, 35, 60, 61, 62, 63, 64, 65, 6, 7, - 8, 9, 10, 11, 36, 37, 38, 39, 40, 41, - 66, 67, 68, 69, 70, 71); + ValueVectorDataPopulator.setVector( + vector, 0, 1, 2, 3, 4, 5, 30, 31, 32, 33, 34, 35, 60, 61, 62, 63, 64, 65, 6, 7, 8, 9, 10, + 11, 36, 37, 38, 39, 40, 41, 66, 67, 68, 69, 70, 71); FixedWidthInPlaceVectorSorter sorter = new FixedWidthInPlaceVectorSorter(); - VectorValueComparator comparator = DefaultVectorComparators.createDefaultComparator(vector); + VectorValueComparator comparator = + DefaultVectorComparators.createDefaultComparator(vector); sorter.sortInPlace(vector, comparator); int[] actual = new int[vector.getValueCount()]; - IntStream.range(0, vector.getValueCount()).forEach( - i -> actual[i] = vector.get(i)); + IntStream.range(0, vector.getValueCount()).forEach(i -> actual[i] = vector.get(i)); assertArrayEquals( - new int[]{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, - 11, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, - 40, 41, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71}, actual); + new int[] { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, + 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71 + }, + actual); } } } diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestFixedWidthOutOfPlaceVectorSorter.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestFixedWidthOutOfPlaceVectorSorter.java index cc13e7f8ceaee..4096897c20a05 100644 --- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestFixedWidthOutOfPlaceVectorSorter.java +++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestFixedWidthOutOfPlaceVectorSorter.java @@ -14,14 +14,12 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.sort; import static org.junit.Assert.assertTrue; import static org.junit.jupiter.api.Assertions.assertArrayEquals; import java.util.stream.IntStream; - import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; import org.apache.arrow.vector.BaseFixedWidthVector; @@ -37,9 +35,7 @@ import org.junit.Before; import org.junit.Test; -/** - * Test cases for {@link FixedWidthOutOfPlaceVectorSorter}. - */ +/** Test cases for {@link FixedWidthOutOfPlaceVectorSorter}. */ public class TestFixedWidthOutOfPlaceVectorSorter extends TestOutOfPlaceVectorSorter { private BufferAllocator allocator; @@ -49,7 +45,9 @@ public TestFixedWidthOutOfPlaceVectorSorter(boolean generalSorter) { } OutOfPlaceVectorSorter getSorter() { - return generalSorter ? new GeneralOutOfPlaceVectorSorter<>() : new FixedWidthOutOfPlaceVectorSorter<>(); + return generalSorter + ? new GeneralOutOfPlaceVectorSorter<>() + : new FixedWidthOutOfPlaceVectorSorter<>(); } @Before @@ -82,10 +80,11 @@ public void testSortByte() { // sort the vector OutOfPlaceVectorSorter sorter = getSorter(); - VectorValueComparator comparator = DefaultVectorComparators.createDefaultComparator(vec); + VectorValueComparator comparator = + DefaultVectorComparators.createDefaultComparator(vec); TinyIntVector sortedVec = - (TinyIntVector) vec.getField().getFieldType().createNewSingleVector("", allocator, null); + (TinyIntVector) vec.getField().getFieldType().createNewSingleVector("", allocator, null); sortedVec.allocateNew(vec.getValueCount()); sortedVec.setValueCount(vec.getValueCount()); @@ -129,10 +128,11 @@ public void testSortShort() { // sort the vector OutOfPlaceVectorSorter sorter = getSorter(); - VectorValueComparator comparator = DefaultVectorComparators.createDefaultComparator(vec); + VectorValueComparator comparator = + DefaultVectorComparators.createDefaultComparator(vec); SmallIntVector sortedVec = - (SmallIntVector) vec.getField().getFieldType().createNewSingleVector("", allocator, null); + (SmallIntVector) vec.getField().getFieldType().createNewSingleVector("", allocator, null); sortedVec.allocateNew(vec.getValueCount()); sortedVec.setValueCount(vec.getValueCount()); @@ -176,9 +176,11 @@ public void testSortInt() { // sort the vector OutOfPlaceVectorSorter sorter = getSorter(); - VectorValueComparator comparator = DefaultVectorComparators.createDefaultComparator(vec); + VectorValueComparator comparator = + DefaultVectorComparators.createDefaultComparator(vec); - IntVector sortedVec = (IntVector) vec.getField().getFieldType().createNewSingleVector("", allocator, null); + IntVector sortedVec = + (IntVector) vec.getField().getFieldType().createNewSingleVector("", allocator, null); sortedVec.allocateNew(vec.getValueCount()); sortedVec.setValueCount(vec.getValueCount()); @@ -222,9 +224,11 @@ public void testSortLong() { // sort the vector OutOfPlaceVectorSorter sorter = getSorter(); - VectorValueComparator comparator = DefaultVectorComparators.createDefaultComparator(vec); + VectorValueComparator comparator = + DefaultVectorComparators.createDefaultComparator(vec); - BigIntVector sortedVec = (BigIntVector) vec.getField().getFieldType().createNewSingleVector("", allocator, null); + BigIntVector sortedVec = + (BigIntVector) vec.getField().getFieldType().createNewSingleVector("", allocator, null); sortedVec.allocateNew(vec.getValueCount()); sortedVec.setValueCount(vec.getValueCount()); @@ -268,9 +272,11 @@ public void testSortFloat() { // sort the vector OutOfPlaceVectorSorter sorter = getSorter(); - VectorValueComparator comparator = DefaultVectorComparators.createDefaultComparator(vec); + VectorValueComparator comparator = + DefaultVectorComparators.createDefaultComparator(vec); - Float4Vector sortedVec = (Float4Vector) vec.getField().getFieldType().createNewSingleVector("", allocator, null); + Float4Vector sortedVec = + (Float4Vector) vec.getField().getFieldType().createNewSingleVector("", allocator, null); sortedVec.allocateNew(vec.getValueCount()); sortedVec.setValueCount(vec.getValueCount()); @@ -314,9 +320,11 @@ public void testSortDouble() { // sort the vector OutOfPlaceVectorSorter sorter = getSorter(); - VectorValueComparator comparator = DefaultVectorComparators.createDefaultComparator(vec); + VectorValueComparator comparator = + DefaultVectorComparators.createDefaultComparator(vec); - Float8Vector sortedVec = (Float8Vector) vec.getField().getFieldType().createNewSingleVector("", allocator, null); + Float8Vector sortedVec = + (Float8Vector) vec.getField().getFieldType().createNewSingleVector("", allocator, null); sortedVec.allocateNew(vec.getValueCount()); sortedVec.setValueCount(vec.getValueCount()); @@ -343,17 +351,17 @@ public void testSortDouble() { @Test public void testSortInt2() { try (IntVector vec = new IntVector("", allocator)) { - ValueVectorDataPopulator.setVector(vec, - 0, 1, 2, 3, 4, 5, 30, 31, 32, 33, - 34, 35, 60, 61, 62, 63, 64, 65, 6, 7, - 8, 9, 10, 11, 36, 37, 38, 39, 40, 41, - 66, 67, 68, 69, 70, 71); + ValueVectorDataPopulator.setVector( + vec, 0, 1, 2, 3, 4, 5, 30, 31, 32, 33, 34, 35, 60, 61, 62, 63, 64, 65, 6, 7, 8, 9, 10, 11, + 36, 37, 38, 39, 40, 41, 66, 67, 68, 69, 70, 71); // sort the vector OutOfPlaceVectorSorter sorter = getSorter(); - VectorValueComparator comparator = DefaultVectorComparators.createDefaultComparator(vec); + VectorValueComparator comparator = + DefaultVectorComparators.createDefaultComparator(vec); - try (IntVector sortedVec = (IntVector) vec.getField().getFieldType().createNewSingleVector("", allocator, null)) { + try (IntVector sortedVec = + (IntVector) vec.getField().getFieldType().createNewSingleVector("", allocator, null)) { sortedVec.allocateNew(vec.getValueCount()); sortedVec.setValueCount(vec.getValueCount()); @@ -361,13 +369,14 @@ public void testSortInt2() { // verify results int[] actual = new int[sortedVec.getValueCount()]; - IntStream.range(0, sortedVec.getValueCount()).forEach( - i -> actual[i] = sortedVec.get(i)); + IntStream.range(0, sortedVec.getValueCount()).forEach(i -> actual[i] = sortedVec.get(i)); assertArrayEquals( - new int[]{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, - 11, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, - 40, 41, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71}, actual); + new int[] { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, + 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71 + }, + actual); } } } diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestFixedWidthSorting.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestFixedWidthSorting.java index 80c72b4e21a27..a92cc77818f4a 100644 --- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestFixedWidthSorting.java +++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestFixedWidthSorting.java @@ -14,14 +14,12 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.sort; import java.util.ArrayList; import java.util.Collection; import java.util.List; import java.util.function.Function; - import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; import org.apache.arrow.vector.BaseFixedWidthVector; @@ -37,9 +35,7 @@ import org.junit.runner.RunWith; import org.junit.runners.Parameterized; -/** - * Test sorting fixed width vectors with random data. - */ +/** Test sorting fixed width vectors with random data. */ @RunWith(Parameterized.class) public class TestFixedWidthSorting> { @@ -70,8 +66,12 @@ public void shutdown() { } public TestFixedWidthSorting( - int length, double nullFraction, boolean inPlace, String desc, - Function vectorGenerator, TestSortingUtil.DataGenerator dataGenerator) { + int length, + double nullFraction, + boolean inPlace, + String desc, + Function vectorGenerator, + TestSortingUtil.DataGenerator dataGenerator) { this.length = length; this.nullFraction = nullFraction; this.inPlace = inPlace; @@ -94,7 +94,8 @@ void sortInPlace() { TestSortingUtil.sortArray(array); FixedWidthInPlaceVectorSorter sorter = new FixedWidthInPlaceVectorSorter(); - VectorValueComparator comparator = DefaultVectorComparators.createDefaultComparator(vector); + VectorValueComparator comparator = + DefaultVectorComparators.createDefaultComparator(vector); sorter.sortInPlace(vector, comparator); @@ -109,9 +110,11 @@ void sortOutOfPlace() { // sort the vector FixedWidthOutOfPlaceVectorSorter sorter = new FixedWidthOutOfPlaceVectorSorter(); - VectorValueComparator comparator = DefaultVectorComparators.createDefaultComparator(vector); + VectorValueComparator comparator = + DefaultVectorComparators.createDefaultComparator(vector); - try (V sortedVec = (V) vector.getField().getFieldType().createNewSingleVector("", allocator, null)) { + try (V sortedVec = + (V) vector.getField().getFieldType().createNewSingleVector("", allocator, null)) { sortedVec.allocateNew(vector.getValueCount()); sortedVec.setValueCount(vector.getValueCount()); @@ -123,47 +126,78 @@ void sortOutOfPlace() { } } - @Parameterized.Parameters(name = "length = {0}, null fraction = {1}, in place = {2}, vector = {3}") + @Parameterized.Parameters( + name = "length = {0}, null fraction = {1}, in place = {2}, vector = {3}") public static Collection getParameters() { List params = new ArrayList<>(); for (int length : VECTOR_LENGTHS) { for (double nullFrac : NULL_FRACTIONS) { for (boolean inPlace : new boolean[] {true, false}) { - params.add(new Object[] { - length, nullFrac, inPlace, "TinyIntVector", - (Function) allocator -> new TinyIntVector("vector", allocator), - TestSortingUtil.TINY_INT_GENERATOR - }); - - params.add(new Object[] { - length, nullFrac, inPlace, "SmallIntVector", - (Function) allocator -> new SmallIntVector("vector", allocator), - TestSortingUtil.SMALL_INT_GENERATOR - }); - - params.add(new Object[] { - length, nullFrac, inPlace, "IntVector", - (Function) allocator -> new IntVector("vector", allocator), - TestSortingUtil.INT_GENERATOR - }); - - params.add(new Object[] { - length, nullFrac, inPlace, "BigIntVector", - (Function) allocator -> new BigIntVector("vector", allocator), - TestSortingUtil.LONG_GENERATOR - }); - - params.add(new Object[] { - length, nullFrac, inPlace, "Float4Vector", - (Function) allocator -> new Float4Vector("vector", allocator), - TestSortingUtil.FLOAT_GENERATOR - }); - - params.add(new Object[] { - length, nullFrac, inPlace, "Float8Vector", - (Function) allocator -> new Float8Vector("vector", allocator), - TestSortingUtil.DOUBLE_GENERATOR - }); + params.add( + new Object[] { + length, + nullFrac, + inPlace, + "TinyIntVector", + (Function) + allocator -> new TinyIntVector("vector", allocator), + TestSortingUtil.TINY_INT_GENERATOR + }); + + params.add( + new Object[] { + length, + nullFrac, + inPlace, + "SmallIntVector", + (Function) + allocator -> new SmallIntVector("vector", allocator), + TestSortingUtil.SMALL_INT_GENERATOR + }); + + params.add( + new Object[] { + length, + nullFrac, + inPlace, + "IntVector", + (Function) + allocator -> new IntVector("vector", allocator), + TestSortingUtil.INT_GENERATOR + }); + + params.add( + new Object[] { + length, + nullFrac, + inPlace, + "BigIntVector", + (Function) + allocator -> new BigIntVector("vector", allocator), + TestSortingUtil.LONG_GENERATOR + }); + + params.add( + new Object[] { + length, + nullFrac, + inPlace, + "Float4Vector", + (Function) + allocator -> new Float4Vector("vector", allocator), + TestSortingUtil.FLOAT_GENERATOR + }); + + params.add( + new Object[] { + length, + nullFrac, + inPlace, + "Float8Vector", + (Function) + allocator -> new Float8Vector("vector", allocator), + TestSortingUtil.DOUBLE_GENERATOR + }); } } } diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestGeneralOutOfPlaceVectorSorter.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestGeneralOutOfPlaceVectorSorter.java index 07a6b545ddaa2..9e796a98ab790 100644 --- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestGeneralOutOfPlaceVectorSorter.java +++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestGeneralOutOfPlaceVectorSorter.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.sort; import static org.junit.Assert.assertEquals; @@ -30,9 +29,7 @@ import org.junit.Before; import org.junit.Test; -/** - * Test cases for {@link GeneralOutOfPlaceVectorSorter}. - */ +/** Test cases for {@link GeneralOutOfPlaceVectorSorter}. */ public class TestGeneralOutOfPlaceVectorSorter { private BufferAllocator allocator; @@ -49,30 +46,33 @@ public void shutdown() { VectorValueComparator getComparator(StructVector structVector) { IntVector child0 = structVector.getChild("column0", IntVector.class); - VectorValueComparator childComp0 = DefaultVectorComparators.createDefaultComparator(child0); + VectorValueComparator childComp0 = + DefaultVectorComparators.createDefaultComparator(child0); childComp0.attachVector(child0); IntVector child1 = structVector.getChild("column1", IntVector.class); - VectorValueComparator childComp1 = DefaultVectorComparators.createDefaultComparator(child1); + VectorValueComparator childComp1 = + DefaultVectorComparators.createDefaultComparator(child1); childComp1.attachVector(child1); - VectorValueComparator comp = new VectorValueComparator() { - - @Override - public int compareNotNull(int index1, int index2) { - // compare values by lexicographic order - int result0 = childComp0.compare(index1, index2); - if (result0 != 0) { - return result0; - } - return childComp1.compare(index1, index2); - } - - @Override - public VectorValueComparator createNew() { - return this; - } - }; + VectorValueComparator comp = + new VectorValueComparator() { + + @Override + public int compareNotNull(int index1, int index2) { + // compare values by lexicographic order + int result0 = childComp0.compare(index1, index2); + if (result0 != 0) { + return result0; + } + return childComp1.compare(index1, index2); + } + + @Override + public VectorValueComparator createNew() { + return this; + } + }; return comp; } @@ -81,17 +81,21 @@ public VectorValueComparator createNew() { public void testSortStructVector() { final int vectorLength = 7; try (StructVector srcVector = StructVector.empty("src struct", allocator); - StructVector dstVector = StructVector.empty("dst struct", allocator)) { + StructVector dstVector = StructVector.empty("dst struct", allocator)) { IntVector srcChild0 = - srcVector.addOrGet("column0", FieldType.nullable(new ArrowType.Int(32, true)), IntVector.class); + srcVector.addOrGet( + "column0", FieldType.nullable(new ArrowType.Int(32, true)), IntVector.class); IntVector srcChild1 = - srcVector.addOrGet("column1", FieldType.nullable(new ArrowType.Int(32, true)), IntVector.class); + srcVector.addOrGet( + "column1", FieldType.nullable(new ArrowType.Int(32, true)), IntVector.class); IntVector dstChild0 = - dstVector.addOrGet("column0", FieldType.nullable(new ArrowType.Int(32, true)), IntVector.class); + dstVector.addOrGet( + "column0", FieldType.nullable(new ArrowType.Int(32, true)), IntVector.class); IntVector dstChild1 = - dstVector.addOrGet("column1", FieldType.nullable(new ArrowType.Int(32, true)), IntVector.class); + dstVector.addOrGet( + "column1", FieldType.nullable(new ArrowType.Int(32, true)), IntVector.class); // src struct vector values: // [ @@ -128,15 +132,16 @@ public void testSortStructVector() { // validate results assertEquals(vectorLength, dstVector.getValueCount()); assertEquals( - "[" + - "null, " + - "{\"column1\":3}, " + - "{\"column0\":2,\"column1\":1}, " + - "{\"column0\":3,\"column1\":4}, " + - "{\"column0\":5,\"column1\":4}, " + - "{\"column0\":6,\"column1\":6}, " + - "{\"column0\":7}" + - "]", dstVector.toString()); + "[" + + "null, " + + "{\"column1\":3}, " + + "{\"column0\":2,\"column1\":1}, " + + "{\"column0\":3,\"column1\":4}, " + + "{\"column0\":5,\"column1\":4}, " + + "{\"column0\":6,\"column1\":6}, " + + "{\"column0\":7}" + + "]", + dstVector.toString()); } } } diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestIndexSorter.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestIndexSorter.java index 99e22f8bdcd5c..bc8aac08b61e4 100644 --- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestIndexSorter.java +++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestIndexSorter.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.sort; import static org.junit.Assert.assertEquals; @@ -28,9 +27,7 @@ import org.junit.Before; import org.junit.Test; -/** - * Test cases for {@link IndexSorter}. - */ +/** Test cases for {@link IndexSorter}. */ public class TestIndexSorter { private BufferAllocator allocator; @@ -56,14 +53,15 @@ public void testIndexSort() { // sort the index IndexSorter indexSorter = new IndexSorter<>(); - DefaultVectorComparators.IntComparator intComparator = new DefaultVectorComparators.IntComparator(); + DefaultVectorComparators.IntComparator intComparator = + new DefaultVectorComparators.IntComparator(); intComparator.attachVector(vec); IntVector indices = new IntVector("", allocator); indices.setValueCount(10); indexSorter.sort(vec, indices, intComparator); - int[] expected = new int[]{6, 9, 1, 3, 0, 4, 5, 7, 2, 8}; + int[] expected = new int[] {6, 9, 1, 3, 0, 4, 5, 7, 2, 8}; for (int i = 0; i < expected.length; i++) { assertTrue(!indices.isNull(i)); @@ -74,8 +72,8 @@ public void testIndexSort() { } /** - * Tests the worst case for quick sort. - * It may cause stack overflow if the algorithm is implemented as a recursive algorithm. + * Tests the worst case for quick sort. It may cause stack overflow if the algorithm is + * implemented as a recursive algorithm. */ @Test public void testSortLargeIncreasingInt() { @@ -91,7 +89,8 @@ public void testSortLargeIncreasingInt() { // sort the vector IndexSorter indexSorter = new IndexSorter<>(); - DefaultVectorComparators.IntComparator intComparator = new DefaultVectorComparators.IntComparator(); + DefaultVectorComparators.IntComparator intComparator = + new DefaultVectorComparators.IntComparator(); intComparator.attachVector(vec); try (IntVector indices = new IntVector("", allocator)) { @@ -110,7 +109,7 @@ public void testSortLargeIncreasingInt() { public void testChoosePivot() { final int vectorLength = 100; try (IntVector vec = new IntVector("vector", allocator); - IntVector indices = new IntVector("indices", allocator)) { + IntVector indices = new IntVector("indices", allocator)) { vec.allocateNew(vectorLength); indices.allocateNew(vectorLength); @@ -122,7 +121,8 @@ public void testChoosePivot() { vec.setValueCount(vectorLength); indices.setValueCount(vectorLength); - VectorValueComparator comparator = DefaultVectorComparators.createDefaultComparator(vec); + VectorValueComparator comparator = + DefaultVectorComparators.createDefaultComparator(vec); // setup internal data structures comparator.attachVector(vec); @@ -147,17 +147,16 @@ public void testChoosePivot() { } } - /** - * Evaluates choosing pivot for all possible permutations of 3 numbers. - */ + /** Evaluates choosing pivot for all possible permutations of 3 numbers. */ @Test public void testChoosePivotAllPermutes() { try (IntVector vec = new IntVector("vector", allocator); - IntVector indices = new IntVector("indices", allocator)) { + IntVector indices = new IntVector("indices", allocator)) { vec.allocateNew(); indices.allocateNew(); - VectorValueComparator comparator = DefaultVectorComparators.createDefaultComparator(vec); + VectorValueComparator comparator = + DefaultVectorComparators.createDefaultComparator(vec); // setup internal data structures comparator.attachVector(vec); diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestInsertionSorter.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestInsertionSorter.java index ba9c42913c0d9..3b16ac30d4ff4 100644 --- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestInsertionSorter.java +++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestInsertionSorter.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.sort; import static org.junit.Assert.assertFalse; @@ -28,9 +27,7 @@ import org.junit.Before; import org.junit.Test; -/** - * Test cases for {@link InsertionSorter}. - */ +/** Test cases for {@link InsertionSorter}. */ public class TestInsertionSorter { private BufferAllocator allocator; @@ -49,7 +46,7 @@ public void shutdown() { private void testSortIntVectorRange(int start, int end, int[] expected) { try (IntVector vector = new IntVector("vector", allocator); - IntVector buffer = new IntVector("buffer", allocator)) { + IntVector buffer = new IntVector("buffer", allocator)) { buffer.allocateNew(1); @@ -81,7 +78,7 @@ public void testSortIntVector() { private void testSortIndicesRange(int start, int end, int[] expectedIndices) { try (IntVector vector = new IntVector("vector", allocator); - IntVector indices = new IntVector("indices", allocator)) { + IntVector indices = new IntVector("indices", allocator)) { ValueVectorDataPopulator.setVector(vector, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); ValueVectorDataPopulator.setVector(indices, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9); diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestOffHeapIntStack.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestOffHeapIntStack.java index 321ca226d7e1d..025576f08e248 100644 --- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestOffHeapIntStack.java +++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestOffHeapIntStack.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.sort; import static junit.framework.TestCase.assertEquals; @@ -26,9 +25,7 @@ import org.junit.Before; import org.junit.Test; -/** - * Test cases for {@link OffHeapIntStack}. - */ +/** Test cases for {@link OffHeapIntStack}. */ public class TestOffHeapIntStack { private BufferAllocator allocator; diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestOutOfPlaceVectorSorter.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestOutOfPlaceVectorSorter.java index 66b75cbccac3e..4f6a8489c43ea 100644 --- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestOutOfPlaceVectorSorter.java +++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestOutOfPlaceVectorSorter.java @@ -14,19 +14,15 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.sort; import java.util.ArrayList; import java.util.Collection; import java.util.List; - import org.junit.runner.RunWith; import org.junit.runners.Parameterized; -/** - * Test cases for out-of-place sorters. - */ +/** Test cases for out-of-place sorters. */ @RunWith(Parameterized.class) public abstract class TestOutOfPlaceVectorSorter { diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestSortingUtil.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestSortingUtil.java index e22b22d4e6757..24b2c752d0863 100644 --- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestSortingUtil.java +++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestSortingUtil.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.sort; import static org.junit.jupiter.api.Assertions.assertEquals; @@ -25,7 +24,6 @@ import java.util.Random; import java.util.function.BiConsumer; import java.util.function.Supplier; - import org.apache.arrow.vector.BigIntVector; import org.apache.arrow.vector.Float4Vector; import org.apache.arrow.vector.Float8Vector; @@ -37,50 +35,59 @@ import org.apache.arrow.vector.testing.RandomDataGenerator; import org.apache.arrow.vector.testing.ValueVectorDataPopulator; -/** - * Utilities for sorting related utilities. - */ +/** Utilities for sorting related utilities. */ public class TestSortingUtil { static final Random random = new Random(0); - static final DataGenerator TINY_INT_GENERATOR = new DataGenerator<>( - RandomDataGenerator.TINY_INT_GENERATOR, - (vector, array) -> ValueVectorDataPopulator.setVector(vector, array), Byte.class); - - static final DataGenerator SMALL_INT_GENERATOR = new DataGenerator<>( - RandomDataGenerator.SMALL_INT_GENERATOR, - (vector, array) -> ValueVectorDataPopulator.setVector(vector, array), Short.class); - - static final DataGenerator INT_GENERATOR = new DataGenerator<>( - RandomDataGenerator.INT_GENERATOR, - (vector, array) -> ValueVectorDataPopulator.setVector(vector, array), Integer.class); - - static final DataGenerator LONG_GENERATOR = new DataGenerator<>( - RandomDataGenerator.LONG_GENERATOR, - (vector, array) -> ValueVectorDataPopulator.setVector(vector, array), Long.class); - - static final DataGenerator FLOAT_GENERATOR = new DataGenerator<>( - RandomDataGenerator.FLOAT_GENERATOR, - (vector, array) -> ValueVectorDataPopulator.setVector(vector, array), Float.class); - - static final DataGenerator DOUBLE_GENERATOR = new DataGenerator<>( - RandomDataGenerator.DOUBLE_GENERATOR, - (vector, array) -> ValueVectorDataPopulator.setVector(vector, array), Double.class); - - static final DataGenerator STRING_GENERATOR = new DataGenerator<>( - () -> { - int strLength = random.nextInt(20) + 1; - return generateRandomString(strLength); - }, - (vector, array) -> ValueVectorDataPopulator.setVector(vector, array), String.class); - - private TestSortingUtil() { - } - - /** - * Verify that a vector is equal to an array. - */ + static final DataGenerator TINY_INT_GENERATOR = + new DataGenerator<>( + RandomDataGenerator.TINY_INT_GENERATOR, + (vector, array) -> ValueVectorDataPopulator.setVector(vector, array), + Byte.class); + + static final DataGenerator SMALL_INT_GENERATOR = + new DataGenerator<>( + RandomDataGenerator.SMALL_INT_GENERATOR, + (vector, array) -> ValueVectorDataPopulator.setVector(vector, array), + Short.class); + + static final DataGenerator INT_GENERATOR = + new DataGenerator<>( + RandomDataGenerator.INT_GENERATOR, + (vector, array) -> ValueVectorDataPopulator.setVector(vector, array), + Integer.class); + + static final DataGenerator LONG_GENERATOR = + new DataGenerator<>( + RandomDataGenerator.LONG_GENERATOR, + (vector, array) -> ValueVectorDataPopulator.setVector(vector, array), + Long.class); + + static final DataGenerator FLOAT_GENERATOR = + new DataGenerator<>( + RandomDataGenerator.FLOAT_GENERATOR, + (vector, array) -> ValueVectorDataPopulator.setVector(vector, array), + Float.class); + + static final DataGenerator DOUBLE_GENERATOR = + new DataGenerator<>( + RandomDataGenerator.DOUBLE_GENERATOR, + (vector, array) -> ValueVectorDataPopulator.setVector(vector, array), + Double.class); + + static final DataGenerator STRING_GENERATOR = + new DataGenerator<>( + () -> { + int strLength = random.nextInt(20) + 1; + return generateRandomString(strLength); + }, + (vector, array) -> ValueVectorDataPopulator.setVector(vector, array), + String.class); + + private TestSortingUtil() {} + + /** Verify that a vector is equal to an array. */ public static void verifyResults(V vector, U[] expected) { assertEquals(vector.getValueCount(), expected.length); for (int i = 0; i < expected.length; i++) { @@ -88,30 +95,28 @@ public static void verifyResults(V vector, U[] expect } } - /** - * Sort an array with null values come first. - */ + /** Sort an array with null values come first. */ public static > void sortArray(U[] array) { - Arrays.sort(array, (a, b) -> { - if (a == null || b == null) { - if (a == null && b == null) { - return 0; - } - - // exactly one is null - if (a == null) { - return -1; - } else { - return 1; - } - } - return a.compareTo(b); - }); + Arrays.sort( + array, + (a, b) -> { + if (a == null || b == null) { + if (a == null && b == null) { + return 0; + } + + // exactly one is null + if (a == null) { + return -1; + } else { + return 1; + } + } + return a.compareTo(b); + }); } - /** - * Generate a string with alphabetic characters only. - */ + /** Generate a string with alphabetic characters only. */ static String generateRandomString(int length) { byte[] str = new byte[length]; final int lower = 'a'; @@ -128,6 +133,7 @@ static String generateRandomString(int length) { /** * Utility to generate data for testing. + * * @param vector type. * @param data element type. */ @@ -139,8 +145,7 @@ static class DataGenerator> { final Class clazz; - DataGenerator( - Supplier dataGenerator, BiConsumer vectorPopulator, Class clazz) { + DataGenerator(Supplier dataGenerator, BiConsumer vectorPopulator, Class clazz) { this.dataGenerator = dataGenerator; this.vectorPopulator = vectorPopulator; this.clazz = clazz; @@ -148,6 +153,7 @@ static class DataGenerator> { /** * Populate the vector according to the specified parameters. + * * @param vector the vector to populate. * @param length vector length. * @param nullFraction the fraction of null values. diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestStableVectorComparator.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestStableVectorComparator.java index f2de5d23fce89..ce15940c1df3d 100644 --- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestStableVectorComparator.java +++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestStableVectorComparator.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.sort; import static org.junit.Assert.assertEquals; @@ -22,7 +21,6 @@ import java.nio.charset.StandardCharsets; import java.util.Objects; - import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; import org.apache.arrow.vector.VarCharVector; @@ -31,9 +29,7 @@ import org.junit.Test; import org.junit.jupiter.api.Assertions; -/** - * Test cases for {@link StableVectorComparator}. - */ +/** Test cases for {@link StableVectorComparator}. */ public class TestStableVectorComparator { private BufferAllocator allocator; @@ -62,7 +58,8 @@ public void testCompare() { vec.set(4, "a".getBytes(StandardCharsets.UTF_8)); VectorValueComparator comparator = new TestVarCharSorter(); - VectorValueComparator stableComparator = new StableVectorComparator<>(comparator); + VectorValueComparator stableComparator = + new StableVectorComparator<>(comparator); stableComparator.attachVector(vec); assertTrue(stableComparator.compare(0, 1) > 0); @@ -95,10 +92,12 @@ public void testStableSortString() { // sort the vector VariableWidthOutOfPlaceVectorSorter sorter = new VariableWidthOutOfPlaceVectorSorter(); VectorValueComparator comparator = new TestVarCharSorter(); - VectorValueComparator stableComparator = new StableVectorComparator<>(comparator); + VectorValueComparator stableComparator = + new StableVectorComparator<>(comparator); try (VarCharVector sortedVec = - (VarCharVector) vec.getField().getFieldType().createNewSingleVector("", allocator, null)) { + (VarCharVector) + vec.getField().getFieldType().createNewSingleVector("", allocator, null)) { sortedVec.allocateNew(vec.getByteCapacity(), vec.getValueCount()); sortedVec.setLastSet(vec.getValueCount() - 1); sortedVec.setValueCount(vec.getValueCount()); @@ -107,23 +106,32 @@ public void testStableSortString() { // verify results // the results are stable - assertEquals("0", new String(Objects.requireNonNull(sortedVec.get(0)), StandardCharsets.UTF_8)); - assertEquals("01", new String(Objects.requireNonNull(sortedVec.get(1)), StandardCharsets.UTF_8)); - assertEquals("0c", new String(Objects.requireNonNull(sortedVec.get(2)), StandardCharsets.UTF_8)); - assertEquals("a", new String(Objects.requireNonNull(sortedVec.get(3)), StandardCharsets.UTF_8)); - assertEquals("abc", new String(Objects.requireNonNull(sortedVec.get(4)), StandardCharsets.UTF_8)); - assertEquals("aa", new String(Objects.requireNonNull(sortedVec.get(5)), StandardCharsets.UTF_8)); - assertEquals("a1", new String(Objects.requireNonNull(sortedVec.get(6)), StandardCharsets.UTF_8)); - assertEquals("abcdefg", new String(Objects.requireNonNull(sortedVec.get(7)), StandardCharsets.UTF_8)); - assertEquals("accc", new String(Objects.requireNonNull(sortedVec.get(8)), StandardCharsets.UTF_8)); - assertEquals("afds", new String(Objects.requireNonNull(sortedVec.get(9)), StandardCharsets.UTF_8)); + assertEquals( + "0", new String(Objects.requireNonNull(sortedVec.get(0)), StandardCharsets.UTF_8)); + assertEquals( + "01", new String(Objects.requireNonNull(sortedVec.get(1)), StandardCharsets.UTF_8)); + assertEquals( + "0c", new String(Objects.requireNonNull(sortedVec.get(2)), StandardCharsets.UTF_8)); + assertEquals( + "a", new String(Objects.requireNonNull(sortedVec.get(3)), StandardCharsets.UTF_8)); + assertEquals( + "abc", new String(Objects.requireNonNull(sortedVec.get(4)), StandardCharsets.UTF_8)); + assertEquals( + "aa", new String(Objects.requireNonNull(sortedVec.get(5)), StandardCharsets.UTF_8)); + assertEquals( + "a1", new String(Objects.requireNonNull(sortedVec.get(6)), StandardCharsets.UTF_8)); + assertEquals( + "abcdefg", + new String(Objects.requireNonNull(sortedVec.get(7)), StandardCharsets.UTF_8)); + assertEquals( + "accc", new String(Objects.requireNonNull(sortedVec.get(8)), StandardCharsets.UTF_8)); + assertEquals( + "afds", new String(Objects.requireNonNull(sortedVec.get(9)), StandardCharsets.UTF_8)); } } } - /** - * Utility comparator that compares varchars by the first character. - */ + /** Utility comparator that compares varchars by the first character. */ private static class TestVarCharSorter extends VectorValueComparator { @Override diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestVariableWidthOutOfPlaceVectorSorter.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestVariableWidthOutOfPlaceVectorSorter.java index 2486034f1fa32..b3f2539fa53c2 100644 --- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestVariableWidthOutOfPlaceVectorSorter.java +++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestVariableWidthOutOfPlaceVectorSorter.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.sort; import static org.junit.Assert.assertEquals; @@ -22,7 +21,6 @@ import java.nio.charset.StandardCharsets; import java.util.Objects; - import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; import org.apache.arrow.vector.BaseVariableWidthVector; @@ -32,9 +30,7 @@ import org.junit.Before; import org.junit.Test; -/** - * Test cases for {@link VariableWidthOutOfPlaceVectorSorter}. - */ +/** Test cases for {@link VariableWidthOutOfPlaceVectorSorter}. */ public class TestVariableWidthOutOfPlaceVectorSorter extends TestOutOfPlaceVectorSorter { private BufferAllocator allocator; @@ -44,10 +40,11 @@ public TestVariableWidthOutOfPlaceVectorSorter(boolean generalSorter) { } OutOfPlaceVectorSorter getSorter() { - return generalSorter ? new GeneralOutOfPlaceVectorSorter<>() : new VariableWidthOutOfPlaceVectorSorter(); + return generalSorter + ? new GeneralOutOfPlaceVectorSorter<>() + : new VariableWidthOutOfPlaceVectorSorter(); } - @Before public void prepare() { allocator = new RootAllocator(1024 * 1024); @@ -79,10 +76,10 @@ public void testSortString() { // sort the vector OutOfPlaceVectorSorter sorter = getSorter(); VectorValueComparator comparator = - DefaultVectorComparators.createDefaultComparator(vec); + DefaultVectorComparators.createDefaultComparator(vec); VarCharVector sortedVec = - (VarCharVector) vec.getField().getFieldType().createNewSingleVector("", allocator, null); + (VarCharVector) vec.getField().getFieldType().createNewSingleVector("", allocator, null); sortedVec.allocateNew(vec.getByteCapacity(), vec.getValueCount()); sortedVec.setLastSet(vec.getValueCount() - 1); sortedVec.setValueCount(vec.getValueCount()); @@ -96,14 +93,23 @@ public void testSortString() { assertTrue(sortedVec.isNull(0)); assertTrue(sortedVec.isNull(1)); - assertEquals("12", new String(Objects.requireNonNull(sortedVec.get(2)), StandardCharsets.UTF_8)); - assertEquals("abc", new String(Objects.requireNonNull(sortedVec.get(3)), StandardCharsets.UTF_8)); - assertEquals("dictionary", new String(Objects.requireNonNull(sortedVec.get(4)), StandardCharsets.UTF_8)); - assertEquals("good", new String(Objects.requireNonNull(sortedVec.get(5)), StandardCharsets.UTF_8)); - assertEquals("hello", new String(Objects.requireNonNull(sortedVec.get(6)), StandardCharsets.UTF_8)); - assertEquals("hello", new String(Objects.requireNonNull(sortedVec.get(7)), StandardCharsets.UTF_8)); - assertEquals("world", new String(Objects.requireNonNull(sortedVec.get(8)), StandardCharsets.UTF_8)); - assertEquals("yes", new String(Objects.requireNonNull(sortedVec.get(9)), StandardCharsets.UTF_8)); + assertEquals( + "12", new String(Objects.requireNonNull(sortedVec.get(2)), StandardCharsets.UTF_8)); + assertEquals( + "abc", new String(Objects.requireNonNull(sortedVec.get(3)), StandardCharsets.UTF_8)); + assertEquals( + "dictionary", + new String(Objects.requireNonNull(sortedVec.get(4)), StandardCharsets.UTF_8)); + assertEquals( + "good", new String(Objects.requireNonNull(sortedVec.get(5)), StandardCharsets.UTF_8)); + assertEquals( + "hello", new String(Objects.requireNonNull(sortedVec.get(6)), StandardCharsets.UTF_8)); + assertEquals( + "hello", new String(Objects.requireNonNull(sortedVec.get(7)), StandardCharsets.UTF_8)); + assertEquals( + "world", new String(Objects.requireNonNull(sortedVec.get(8)), StandardCharsets.UTF_8)); + assertEquals( + "yes", new String(Objects.requireNonNull(sortedVec.get(9)), StandardCharsets.UTF_8)); sortedVec.close(); } diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestVariableWidthSorting.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestVariableWidthSorting.java index 7951c39d550d2..5c37ddf9284e4 100644 --- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestVariableWidthSorting.java +++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestVariableWidthSorting.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.sort; import static org.junit.jupiter.api.Assertions.assertArrayEquals; @@ -28,7 +27,6 @@ import java.util.Comparator; import java.util.List; import java.util.function.Function; - import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; import org.apache.arrow.vector.BaseVariableWidthVector; @@ -41,9 +39,7 @@ import org.junit.runner.RunWith; import org.junit.runners.Parameterized; -/** - * Test sorting variable width vectors with random data. - */ +/** Test sorting variable width vectors with random data. */ @RunWith(Parameterized.class) public class TestVariableWidthSorting> { @@ -72,8 +68,11 @@ public void shutdown() { } public TestVariableWidthSorting( - int length, double nullFraction, String desc, - Function vectorGenerator, TestSortingUtil.DataGenerator dataGenerator) { + int length, + double nullFraction, + String desc, + Function vectorGenerator, + TestSortingUtil.DataGenerator dataGenerator) { this.length = length; this.nullFraction = nullFraction; this.vectorGenerator = vectorGenerator; @@ -92,9 +91,11 @@ void sortOutOfPlace() { // sort the vector VariableWidthOutOfPlaceVectorSorter sorter = new VariableWidthOutOfPlaceVectorSorter(); - VectorValueComparator comparator = DefaultVectorComparators.createDefaultComparator(vector); + VectorValueComparator comparator = + DefaultVectorComparators.createDefaultComparator(vector); - try (V sortedVec = (V) vector.getField().getFieldType().createNewSingleVector("", allocator, null)) { + try (V sortedVec = + (V) vector.getField().getFieldType().createNewSingleVector("", allocator, null)) { int dataSize = vector.getOffsetBuffer().getInt(vector.getValueCount() * 4L); sortedVec.allocateNew(dataSize, vector.getValueCount()); sortedVec.setValueCount(vector.getValueCount()); @@ -112,33 +113,36 @@ public static Collection getParameters() { List params = new ArrayList<>(); for (int length : VECTOR_LENGTHS) { for (double nullFrac : NULL_FRACTIONS) { - params.add(new Object[]{ - length, nullFrac, "VarCharVector", - (Function) allocator -> new VarCharVector("vector", allocator), - TestSortingUtil.STRING_GENERATOR - }); + params.add( + new Object[] { + length, + nullFrac, + "VarCharVector", + (Function) + allocator -> new VarCharVector("vector", allocator), + TestSortingUtil.STRING_GENERATOR + }); } } return params; } - /** - * Verify results as byte arrays. - */ + /** Verify results as byte arrays. */ public static void verifyResults(V vector, String[] expected) { assertEquals(vector.getValueCount(), expected.length); for (int i = 0; i < expected.length; i++) { if (expected[i] == null) { assertTrue(vector.isNull(i)); } else { - assertArrayEquals(((Text) vector.getObject(i)).getBytes(), expected[i].getBytes(StandardCharsets.UTF_8)); + assertArrayEquals( + ((Text) vector.getObject(i)).getBytes(), expected[i].getBytes(StandardCharsets.UTF_8)); } } } /** - * String comparator with the same behavior as that of - * {@link DefaultVectorComparators.VariableWidthComparator}. + * String comparator with the same behavior as that of {@link + * DefaultVectorComparators.VariableWidthComparator}. */ static class StringComparator implements Comparator { diff --git a/java/bom/pom.xml b/java/bom/pom.xml index 12b9950ad80fc..77aed2d0f6a37 100644 --- a/java/bom/pom.xml +++ b/java/bom/pom.xml @@ -15,7 +15,7 @@ org.apache apache - 18 + 31 org.apache.arrow @@ -27,6 +27,19 @@ + + 1.8 + 1.8 + 3.12.0 + 3.2.5 + 0.16.1 + 3.7.1 + 3.12.1 + 3.6.1 + 3.2.4 + 3.2.2 + 3.6.3 + 3.5.0 @@ -138,11 +151,9 @@ ${project.version} - - @@ -156,12 +167,10 @@ org.apache.maven.plugins maven-project-info-reports-plugin - 3.5.0 org.apache.maven.plugins maven-site-plugin - 3.12.1 com.diffplug.spotless @@ -188,13 +197,34 @@ org.apache.maven.plugins maven-project-info-reports-plugin - 3.5.0 org.apache.maven.plugins maven-site-plugin - 3.12.1 + + + + apache-release + + + + org.apache.maven.plugins + maven-assembly-plugin + + + source-release-assembly + + + true + + + + + + + + diff --git a/java/c/pom.xml b/java/c/pom.xml index 1095e99bbdd3f..afb6e0cd8b890 100644 --- a/java/c/pom.xml +++ b/java/c/pom.xml @@ -50,7 +50,7 @@ org.immutables - value + value-annotations org.apache.arrow @@ -83,5 +83,4 @@ - diff --git a/java/c/src/main/java/org/apache/arrow/c/BufferImportTypeVisitor.java b/java/c/src/main/java/org/apache/arrow/c/BufferImportTypeVisitor.java index cd2a464f4fa17..99873dadad242 100644 --- a/java/c/src/main/java/org/apache/arrow/c/BufferImportTypeVisitor.java +++ b/java/c/src/main/java/org/apache/arrow/c/BufferImportTypeVisitor.java @@ -53,6 +53,7 @@ import org.apache.arrow.vector.complex.UnionVector; import org.apache.arrow.vector.ipc.message.ArrowFieldNode; import org.apache.arrow.vector.types.pojo.ArrowType; +import org.apache.arrow.vector.types.pojo.ArrowType.ListView; import org.apache.arrow.vector.util.DataSizeRoundingUtil; /** @@ -209,6 +210,11 @@ public List visit(ArrowType.Utf8 type) { } } + @Override + public List visit(ArrowType.Utf8View type) { + throw new UnsupportedOperationException("Importing buffers for view type: " + type + " not supported"); + } + @Override public List visit(ArrowType.LargeUtf8 type) { try (ArrowBuf offsets = importOffsets(type, LargeVarCharVector.OFFSET_WIDTH)) { @@ -237,6 +243,11 @@ public List visit(ArrowType.Binary type) { } } + @Override + public List visit(ArrowType.BinaryView type) { + throw new UnsupportedOperationException("Importing buffers for view type: " + type + " not supported"); + } + @Override public List visit(ArrowType.LargeBinary type) { try (ArrowBuf offsets = importOffsets(type, LargeVarBinaryVector.OFFSET_WIDTH)) { @@ -318,4 +329,9 @@ public List visit(ArrowType.Interval type) { public List visit(ArrowType.Duration type) { return Arrays.asList(maybeImportBitmap(type), importFixedBytes(type, 1, DurationVector.TYPE_WIDTH)); } + + @Override + public List visit(ListView type) { + throw new UnsupportedOperationException("Importing buffers for view type: " + type + " not supported"); + } } diff --git a/java/c/src/main/java/org/apache/arrow/c/Format.java b/java/c/src/main/java/org/apache/arrow/c/Format.java index 2875e46f749c4..a5f44859e8327 100644 --- a/java/c/src/main/java/org/apache/arrow/c/Format.java +++ b/java/c/src/main/java/org/apache/arrow/c/Format.java @@ -18,6 +18,7 @@ package org.apache.arrow.c; import java.util.Arrays; +import java.util.Locale; import java.util.stream.Collectors; import org.apache.arrow.util.Preconditions; @@ -127,7 +128,7 @@ static String asString(ArrowType arrowType) { String.format("Int type with bitwidth %d is unsupported", type.getBitWidth())); } if (type.getIsSigned()) { - format = format.toLowerCase(); + format = format.toLowerCase(Locale.ROOT); } return format; } diff --git a/java/c/src/main/java/org/apache/arrow/c/StructVectorLoader.java b/java/c/src/main/java/org/apache/arrow/c/StructVectorLoader.java index d9afd0189d807..1b0c59163a187 100644 --- a/java/c/src/main/java/org/apache/arrow/c/StructVectorLoader.java +++ b/java/c/src/main/java/org/apache/arrow/c/StructVectorLoader.java @@ -27,6 +27,7 @@ import org.apache.arrow.memory.ArrowBuf; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.util.Collections2; +import org.apache.arrow.vector.BaseVariableWidthViewVector; import org.apache.arrow.vector.FieldVector; import org.apache.arrow.vector.TypeLayout; import org.apache.arrow.vector.complex.StructVector; @@ -54,7 +55,14 @@ public class StructVectorLoader { /** * Construct with a schema. - * + *

    + * The schema referred to here can be obtained from the struct vector. + * The schema here should be the children of a struct vector, not a schema + * containing the struct field itself. + * For example: + * + * Schema schema = new Schema(structVector.getField().getChildren()); + * * @param schema buffers are added based on schema. */ public StructVectorLoader(Schema schema) { @@ -90,22 +98,36 @@ public StructVector load(BufferAllocator allocator, ArrowRecordBatch recordBatch .fromCompressionType(recordBatch.getBodyCompression().getCodec()); decompressionNeeded = codecType != CompressionUtil.CodecType.NO_COMPRESSION; CompressionCodec codec = decompressionNeeded ? factory.createCodec(codecType) : NoCompressionCodec.INSTANCE; + Iterator variadicBufferCounts = Collections.emptyIterator(); + if (recordBatch.getVariadicBufferCounts() != null && !recordBatch.getVariadicBufferCounts().isEmpty()) { + variadicBufferCounts = recordBatch.getVariadicBufferCounts().iterator(); + } for (FieldVector fieldVector : result.getChildrenFromFields()) { - loadBuffers(fieldVector, fieldVector.getField(), buffers, nodes, codec); + loadBuffers(fieldVector, fieldVector.getField(), buffers, nodes, codec, variadicBufferCounts); } result.loadFieldBuffers(new ArrowFieldNode(recordBatch.getLength(), 0), Collections.singletonList(null)); - if (nodes.hasNext() || buffers.hasNext()) { - throw new IllegalArgumentException("not all nodes and buffers were consumed. nodes: " + - Collections2.toList(nodes).toString() + " buffers: " + Collections2.toList(buffers).toString()); + if (nodes.hasNext() || buffers.hasNext() || variadicBufferCounts.hasNext()) { + throw new IllegalArgumentException("not all nodes, buffers and variadicBufferCounts were consumed. nodes: " + + Collections2.toString(nodes) + " buffers: " + Collections2.toString(buffers) + " variadicBufferCounts: " + + Collections2.toString(variadicBufferCounts)); } return result; } private void loadBuffers(FieldVector vector, Field field, Iterator buffers, Iterator nodes, - CompressionCodec codec) { + CompressionCodec codec, Iterator variadicBufferCounts) { checkArgument(nodes.hasNext(), "no more field nodes for field %s and vector %s", field, vector); ArrowFieldNode fieldNode = nodes.next(); - int bufferLayoutCount = TypeLayout.getTypeBufferCount(field.getType()); + // variadicBufferLayoutCount will be 0 for vectors of a type except BaseVariableWidthViewVector + long variadicBufferLayoutCount = 0; + if (vector instanceof BaseVariableWidthViewVector) { + if (variadicBufferCounts.hasNext()) { + variadicBufferLayoutCount = variadicBufferCounts.next(); + } else { + throw new IllegalStateException("No variadicBufferCounts available for BaseVariableWidthViewVector"); + } + } + int bufferLayoutCount = (int) (variadicBufferLayoutCount + TypeLayout.getTypeBufferCount(field.getType())); List ownBuffers = new ArrayList<>(bufferLayoutCount); for (int j = 0; j < bufferLayoutCount; j++) { ArrowBuf nextBuf = buffers.next(); @@ -138,7 +160,7 @@ private void loadBuffers(FieldVector vector, Field field, Iterator buf for (int i = 0; i < childrenFromFields.size(); i++) { Field child = children.get(i); FieldVector fieldVector = childrenFromFields.get(i); - loadBuffers(fieldVector, child, buffers, nodes, codec); + loadBuffers(fieldVector, child, buffers, nodes, codec, variadicBufferCounts); } } } diff --git a/java/c/src/main/java/org/apache/arrow/c/StructVectorUnloader.java b/java/c/src/main/java/org/apache/arrow/c/StructVectorUnloader.java index aa6d9b4d0f6a7..82539acf6f292 100644 --- a/java/c/src/main/java/org/apache/arrow/c/StructVectorUnloader.java +++ b/java/c/src/main/java/org/apache/arrow/c/StructVectorUnloader.java @@ -21,6 +21,7 @@ import java.util.List; import org.apache.arrow.memory.ArrowBuf; +import org.apache.arrow.vector.BaseVariableWidthViewVector; import org.apache.arrow.vector.FieldVector; import org.apache.arrow.vector.TypeLayout; import org.apache.arrow.vector.complex.StructVector; @@ -87,17 +88,31 @@ public StructVectorUnloader(StructVector root, boolean includeNullCount, Compres public ArrowRecordBatch getRecordBatch() { List nodes = new ArrayList<>(); List buffers = new ArrayList<>(); + List variadicBufferCounts = new ArrayList<>(); for (FieldVector vector : root.getChildrenFromFields()) { - appendNodes(vector, nodes, buffers); + appendNodes(vector, nodes, buffers, variadicBufferCounts); } return new ArrowRecordBatch(root.getValueCount(), nodes, buffers, CompressionUtil.createBodyCompression(codec), - alignBuffers); + variadicBufferCounts, alignBuffers); } - private void appendNodes(FieldVector vector, List nodes, List buffers) { + private long getVariadicBufferCount(FieldVector vector) { + if (vector instanceof BaseVariableWidthViewVector) { + return ((BaseVariableWidthViewVector) vector).getDataBuffers().size(); + } + return 0L; + } + + private void appendNodes(FieldVector vector, List nodes, List buffers, + List variadicBufferCounts) { nodes.add(new ArrowFieldNode(vector.getValueCount(), includeNullCount ? vector.getNullCount() : -1)); List fieldBuffers = vector.getFieldBuffers(); - int expectedBufferCount = TypeLayout.getTypeBufferCount(vector.getField().getType()); + long variadicBufferCount = getVariadicBufferCount(vector); + int expectedBufferCount = (int) (TypeLayout.getTypeBufferCount(vector.getField().getType()) + variadicBufferCount); + // only update variadicBufferCounts for vectors that have variadic buffers + if (variadicBufferCount > 0) { + variadicBufferCounts.add(variadicBufferCount); + } if (fieldBuffers.size() != expectedBufferCount) { throw new IllegalArgumentException(String.format("wrong number of buffers for field %s in vector %s. found: %s", vector.getField(), vector.getClass().getSimpleName(), fieldBuffers)); @@ -106,7 +121,7 @@ private void appendNodes(FieldVector vector, List nodes, List org.immutables - value + value-annotations org.apache.commons commons-compress - 1.26.0 + 1.26.2 com.github.luben zstd-jni - 1.5.5-11 + 1.5.6-3 diff --git a/java/compression/src/test/java/org/apache/arrow/compression/TestArrowReaderWriterWithCompression.java b/java/compression/src/test/java/org/apache/arrow/compression/TestArrowReaderWriterWithCompression.java index af28333746290..24d6abf3cb7c3 100644 --- a/java/compression/src/test/java/org/apache/arrow/compression/TestArrowReaderWriterWithCompression.java +++ b/java/compression/src/test/java/org/apache/arrow/compression/TestArrowReaderWriterWithCompression.java @@ -17,6 +17,11 @@ package org.apache.arrow.compression; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; + import java.io.ByteArrayOutputStream; import java.io.IOException; import java.nio.channels.Channels; @@ -46,9 +51,7 @@ import org.apache.arrow.vector.types.pojo.FieldType; import org.apache.arrow.vector.types.pojo.Schema; import org.apache.arrow.vector.util.ByteArrayReadableSeekableByteChannel; -import org.junit.After; -import org.junit.Assert; -import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; @@ -67,7 +70,7 @@ public void setup() { root = null; } - @After + @AfterEach public void tearDown() { if (root != null) { root.close(); @@ -134,19 +137,19 @@ public void testArrowFileZstdRoundTrip() throws Exception { try (ArrowFileReader reader = new ArrowFileReader(new ByteArrayReadableSeekableByteChannel(out.toByteArray()), allocator, CommonsCompressionFactory.INSTANCE)) { - Assertions.assertEquals(1, reader.getRecordBlocks().size()); - Assertions.assertTrue(reader.loadNextBatch()); - Assertions.assertTrue(root.equals(reader.getVectorSchemaRoot())); - Assertions.assertFalse(reader.loadNextBatch()); + assertEquals(1, reader.getRecordBlocks().size()); + assertTrue(reader.loadNextBatch()); + assertTrue(root.equals(reader.getVectorSchemaRoot())); + assertFalse(reader.loadNextBatch()); } // without compression try (ArrowFileReader reader = new ArrowFileReader(new ByteArrayReadableSeekableByteChannel(out.toByteArray()), allocator, NoCompressionCodec.Factory.INSTANCE)) { - Assertions.assertEquals(1, reader.getRecordBlocks().size()); - Exception exception = Assert.assertThrows(IllegalArgumentException.class, + assertEquals(1, reader.getRecordBlocks().size()); + Exception exception = assertThrows(IllegalArgumentException.class, reader::loadNextBatch); - Assertions.assertEquals("Please add arrow-compression module to use CommonsCompressionFactory for ZSTD", + assertEquals("Please add arrow-compression module to use CommonsCompressionFactory for ZSTD", exception.getMessage()); } } @@ -158,17 +161,17 @@ public void testArrowStreamZstdRoundTrip() throws Exception { try (ArrowStreamReader reader = new ArrowStreamReader(new ByteArrayReadableSeekableByteChannel(out.toByteArray()), allocator, CommonsCompressionFactory.INSTANCE)) { - Assert.assertTrue(reader.loadNextBatch()); - Assert.assertTrue(root.equals(reader.getVectorSchemaRoot())); - Assert.assertFalse(reader.loadNextBatch()); + assertTrue(reader.loadNextBatch()); + assertTrue(root.equals(reader.getVectorSchemaRoot())); + assertFalse(reader.loadNextBatch()); } // without compression try (ArrowStreamReader reader = new ArrowStreamReader(new ByteArrayReadableSeekableByteChannel(out.toByteArray()), allocator, NoCompressionCodec.Factory.INSTANCE)) { - Exception exception = Assert.assertThrows(IllegalArgumentException.class, + Exception exception = assertThrows(IllegalArgumentException.class, reader::loadNextBatch); - Assert.assertEquals( + assertEquals( "Please add arrow-compression module to use CommonsCompressionFactory for ZSTD", exception.getMessage() ); @@ -189,19 +192,19 @@ public void testArrowFileZstdRoundTripWithDictionary() throws Exception { try (ArrowFileReader reader = new ArrowFileReader(new ByteArrayReadableSeekableByteChannel(out.toByteArray()), allocator, CommonsCompressionFactory.INSTANCE)) { - Assertions.assertEquals(1, reader.getRecordBlocks().size()); - Assertions.assertTrue(reader.loadNextBatch()); - Assertions.assertTrue(root.equals(reader.getVectorSchemaRoot())); - Assertions.assertFalse(reader.loadNextBatch()); + assertEquals(1, reader.getRecordBlocks().size()); + assertTrue(reader.loadNextBatch()); + assertTrue(root.equals(reader.getVectorSchemaRoot())); + assertFalse(reader.loadNextBatch()); } // without compression try (ArrowFileReader reader = new ArrowFileReader(new ByteArrayReadableSeekableByteChannel(out.toByteArray()), allocator, NoCompressionCodec.Factory.INSTANCE)) { - Assertions.assertEquals(1, reader.getRecordBlocks().size()); - Exception exception = Assert.assertThrows(IllegalArgumentException.class, + assertEquals(1, reader.getRecordBlocks().size()); + Exception exception = assertThrows(IllegalArgumentException.class, reader::loadNextBatch); - Assertions.assertEquals("Please add arrow-compression module to use CommonsCompressionFactory for ZSTD", + assertEquals("Please add arrow-compression module to use CommonsCompressionFactory for ZSTD", exception.getMessage()); } dictionaryVector.close(); @@ -221,17 +224,17 @@ public void testArrowStreamZstdRoundTripWithDictionary() throws Exception { try (ArrowStreamReader reader = new ArrowStreamReader(new ByteArrayReadableSeekableByteChannel(out.toByteArray()), allocator, CommonsCompressionFactory.INSTANCE)) { - Assertions.assertTrue(reader.loadNextBatch()); - Assertions.assertTrue(root.equals(reader.getVectorSchemaRoot())); - Assertions.assertFalse(reader.loadNextBatch()); + assertTrue(reader.loadNextBatch()); + assertTrue(root.equals(reader.getVectorSchemaRoot())); + assertFalse(reader.loadNextBatch()); } // without compression try (ArrowStreamReader reader = new ArrowStreamReader(new ByteArrayReadableSeekableByteChannel(out.toByteArray()), allocator, NoCompressionCodec.Factory.INSTANCE)) { - Exception exception = Assert.assertThrows(IllegalArgumentException.class, + Exception exception = assertThrows(IllegalArgumentException.class, reader::loadNextBatch); - Assertions.assertEquals("Please add arrow-compression module to use CommonsCompressionFactory for ZSTD", + assertEquals("Please add arrow-compression module to use CommonsCompressionFactory for ZSTD", exception.getMessage()); } dictionaryVector.close(); diff --git a/java/dataset/pom.xml b/java/dataset/pom.xml index 62ea79f55ccd4..3dea16204a4db 100644 --- a/java/dataset/pom.xml +++ b/java/dataset/pom.xml @@ -23,7 +23,6 @@ Java implementation of Arrow Dataset API/Framework ../../../cpp/release-build/ - 2.5.0 1.13.1 1.11.3 @@ -47,7 +46,7 @@ org.immutables - value + value-annotations org.apache.arrow @@ -195,21 +194,14 @@ jdk11+ [11,] - - !m2e.version - org.apache.maven.plugins maven-surefire-plugin - - false - - ${project.basedir}/../../testing/data - - --add-reads=org.apache.arrow.dataset=com.fasterxml.jackson.databind --add-opens=java.base/java.nio=org.apache.arrow.memory.core,ALL-UNNAMED + + --add-reads=org.apache.arrow.dataset=com.fasterxml.jackson.databind --add-opens=java.base/java.nio=org.apache.arrow.dataset,org.apache.arrow.memory.core,ALL-UNNAMED diff --git a/java/dataset/src/main/cpp/jni_wrapper.cc b/java/dataset/src/main/cpp/jni_wrapper.cc index 19a43c8d2fa41..79efbeb74fc54 100644 --- a/java/dataset/src/main/cpp/jni_wrapper.cc +++ b/java/dataset/src/main/cpp/jni_wrapper.cc @@ -25,9 +25,8 @@ #include "arrow/c/helpers.h" #include "arrow/dataset/api.h" #include "arrow/dataset/file_base.h" -#include "arrow/filesystem/localfs.h" +#include "arrow/filesystem/api.h" #include "arrow/filesystem/path_util.h" -#include "arrow/filesystem/s3fs.h" #include "arrow/engine/substrait/util.h" #include "arrow/engine/substrait/serde.h" #include "arrow/engine/substrait/relation.h" @@ -660,7 +659,9 @@ JNIEXPORT void JNICALL Java_org_apache_arrow_dataset_jni_JniWrapper_releaseBuffe JNIEXPORT void JNICALL Java_org_apache_arrow_dataset_jni_JniWrapper_ensureS3Finalized( JNIEnv* env, jobject) { JNI_METHOD_START +#ifdef ARROW_S3 JniAssertOkOrThrow(arrow::fs::EnsureS3Finalized()); +#endif JNI_METHOD_END() } diff --git a/java/dev/checkstyle/checkstyle-spotless.xml b/java/dev/checkstyle/checkstyle-spotless.xml new file mode 100644 index 0000000000000..a2e9a60b12c72 --- /dev/null +++ b/java/dev/checkstyle/checkstyle-spotless.xml @@ -0,0 +1,286 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/java/dev/checkstyle/checkstyle.license b/java/dev/license/asf-java.license similarity index 100% rename from java/dev/checkstyle/checkstyle.license rename to java/dev/license/asf-java.license diff --git a/java/dev/license/asf-xml.license b/java/dev/license/asf-xml.license new file mode 100644 index 0000000000000..a43b97bca8f0f --- /dev/null +++ b/java/dev/license/asf-xml.license @@ -0,0 +1,11 @@ + + \ No newline at end of file diff --git a/java/flight/flight-core/pom.xml b/java/flight/flight-core/pom.xml index 897af0b9e1129..f2070d4ff7cba 100644 --- a/java/flight/flight-core/pom.xml +++ b/java/flight/flight-core/pom.xml @@ -15,13 +15,12 @@ org.apache.arrow arrow-flight 17.0.0-SNAPSHOT - ../pom.xml flight-core jar Arrow Flight Core - (Experimental)An RPC mechanism for transferring ValueVectors. + An RPC mechanism for transferring ValueVectors. 1 @@ -119,13 +118,13 @@ org.immutables - value + value-annotations com.google.api.grpc proto-google-common-protos - 2.37.1 + 2.39.1 test @@ -151,13 +150,6 @@ org.apache.maven.plugins maven-shade-plugin - - 3.2.4 shade-main @@ -166,6 +158,7 @@ package + false true shaded @@ -192,6 +185,7 @@ package + false true shaded-ext @@ -244,7 +238,6 @@ org.apache.maven.plugins maven-dependency-plugin - 3.3.0 analyze @@ -264,7 +257,6 @@ org.codehaus.mojo build-helper-maven-plugin - 1.9.1 add-generated-sources-to-classpath @@ -282,7 +274,6 @@ maven-assembly-plugin - 3.7.1 jar-with-dependencies @@ -299,13 +290,6 @@ - - - kr.motd.maven - os-maven-plugin - 1.7.1 - - @@ -313,18 +297,14 @@ jdk11+ [11,] - - !m2e.version - org.apache.maven.plugins maven-surefire-plugin - - --add-opens=org.apache.arrow.flight.core/org.apache.arrow.flight.perf.impl=protobuf.java --add-opens=java.base/java.nio=org.apache.arrow.memory.core,ALL-UNNAMED - false + + --add-opens=org.apache.arrow.flight.core/org.apache.arrow.flight.perf.impl=protobuf.java --add-opens=java.base/java.nio=org.apache.arrow.memory.core,ALL-UNNAMED ${project.basedir}/../../../testing/data @@ -334,5 +314,4 @@ - diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/CallStatus.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/CallStatus.java index 991d0ed6a043b..8fc2002207e24 100644 --- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/CallStatus.java +++ b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/CallStatus.java @@ -49,6 +49,7 @@ public class CallStatus { public static final CallStatus UNAUTHORIZED = FlightStatusCode.UNAUTHORIZED.toStatus(); public static final CallStatus UNIMPLEMENTED = FlightStatusCode.UNIMPLEMENTED.toStatus(); public static final CallStatus UNAVAILABLE = FlightStatusCode.UNAVAILABLE.toStatus(); + public static final CallStatus RESOURCE_EXHAUSTED = FlightStatusCode.RESOURCE_EXHAUSTED.toStatus(); /** * Create a new status. diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightStatusCode.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightStatusCode.java index 3d96877ba02de..09a2c7afda106 100644 --- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightStatusCode.java +++ b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightStatusCode.java @@ -71,6 +71,11 @@ public enum FlightStatusCode { * should send this code only if it has not done any work. */ UNAVAILABLE, + /** + * Some resource has been exhausted, perhaps a per-user quota, or perhaps the entire file system is out of space. + * (see: https://grpc.github.io/grpc/core/md_doc_statuscodes.html) + */ + RESOURCE_EXHAUSTED ; /** diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/ServerSessionMiddleware.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/ServerSessionMiddleware.java index 7091caa5e98bc..af22cd8aade22 100644 --- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/ServerSessionMiddleware.java +++ b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/ServerSessionMiddleware.java @@ -26,8 +26,6 @@ /** * Middleware for handling Flight SQL Sessions including session cookie handling. - * - * Currently experimental. */ public class ServerSessionMiddleware implements FlightServerMiddleware { Factory factory; diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/grpc/StatusUtils.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/grpc/StatusUtils.java index 7f0dcf2da3f0d..a2d9a85aaa442 100644 --- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/grpc/StatusUtils.java +++ b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/grpc/StatusUtils.java @@ -74,6 +74,8 @@ public static Status.Code toGrpcStatusCode(FlightStatusCode code) { return Code.UNIMPLEMENTED; case UNAVAILABLE: return Code.UNAVAILABLE; + case RESOURCE_EXHAUSTED: + return Code.RESOURCE_EXHAUSTED; default: return Code.UNKNOWN; } @@ -101,7 +103,7 @@ public static FlightStatusCode fromGrpcStatusCode(Status.Code code) { case PERMISSION_DENIED: return FlightStatusCode.UNAUTHORIZED; case RESOURCE_EXHAUSTED: - return FlightStatusCode.INVALID_ARGUMENT; + return FlightStatusCode.RESOURCE_EXHAUSTED; case FAILED_PRECONDITION: return FlightStatusCode.INVALID_ARGUMENT; case ABORTED: diff --git a/java/flight/flight-core/src/test/java/org/apache/arrow/flight/grpc/TestStatusUtils.java b/java/flight/flight-core/src/test/java/org/apache/arrow/flight/grpc/TestStatusUtils.java index 9912a26ea340a..730ea30a2f598 100644 --- a/java/flight/flight-core/src/test/java/org/apache/arrow/flight/grpc/TestStatusUtils.java +++ b/java/flight/flight-core/src/test/java/org/apache/arrow/flight/grpc/TestStatusUtils.java @@ -48,4 +48,26 @@ public void testParseTrailers() { Assertions.assertTrue(callStatus.metadata().containsKey("content-type")); Assertions.assertEquals("text/html", callStatus.metadata().get("content-type")); } + + @Test + public void testGrpcResourceExhaustedTranslatedToFlightStatus() { + Status status = Status.RESOURCE_EXHAUSTED; + + CallStatus callStatus = StatusUtils.fromGrpcStatus(status); + Assertions.assertEquals(FlightStatusCode.RESOURCE_EXHAUSTED, callStatus.code()); + + FlightStatusCode flightStatusCode = StatusUtils.fromGrpcStatusCode(status.getCode()); + Assertions.assertEquals(FlightStatusCode.RESOURCE_EXHAUSTED, flightStatusCode); + } + + @Test + public void testFlightResourceExhaustedTranslatedToGrpcStatua() { + CallStatus callStatus = CallStatus.RESOURCE_EXHAUSTED; + + Status.Code grpcStatusCode = StatusUtils.toGrpcStatusCode(callStatus.code()); + Assertions.assertEquals(Status.RESOURCE_EXHAUSTED.getCode(), grpcStatusCode); + + Status grpcStatus = StatusUtils.toGrpcStatus(callStatus); + Assertions.assertEquals(Status.RESOURCE_EXHAUSTED.getCode(), grpcStatus.getCode()); + } } diff --git a/java/flight/flight-integration-tests/pom.xml b/java/flight/flight-integration-tests/pom.xml index 74016d81e91e5..cd2c28ba8959f 100644 --- a/java/flight/flight-integration-tests/pom.xml +++ b/java/flight/flight-integration-tests/pom.xml @@ -15,7 +15,6 @@ org.apache.arrow arrow-flight 17.0.0-SNAPSHOT - ../pom.xml flight-integration-tests @@ -63,7 +62,6 @@ maven-assembly-plugin - 3.7.1 jar-with-dependencies diff --git a/java/flight/flight-sql-jdbc-core/pom.xml b/java/flight/flight-sql-jdbc-core/pom.xml index 36da335b37b9a..50d7b2617a5a9 100644 --- a/java/flight/flight-sql-jdbc-core/pom.xml +++ b/java/flight/flight-sql-jdbc-core/pom.xml @@ -15,7 +15,6 @@ org.apache.arrow arrow-flight 17.0.0-SNAPSHOT - ../pom.xml flight-sql-jdbc-core @@ -47,20 +46,17 @@ - org.apache.arrow arrow-memory-core - org.apache.arrow arrow-memory-netty runtime - org.apache.arrow arrow-vector @@ -125,22 +121,17 @@ org.apache.calcite.avatica avatica - 1.24.0 + 1.25.0 org.bouncycastle - bcpkix-jdk15on - 1.70 + bcpkix-jdk18on + 1.78.1 - - - src/main/resources - - maven-surefire-plugin @@ -154,7 +145,6 @@ org.codehaus.mojo properties-maven-plugin - 1.2.1 write-project-properties-to-file diff --git a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/converter/impl/BinaryViewAvaticaParameterConverter.java b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/converter/impl/BinaryViewAvaticaParameterConverter.java new file mode 100644 index 0000000000000..dfd4727014292 --- /dev/null +++ b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/converter/impl/BinaryViewAvaticaParameterConverter.java @@ -0,0 +1,42 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.driver.jdbc.converter.impl; + +import org.apache.arrow.vector.FieldVector; +import org.apache.arrow.vector.types.pojo.ArrowType; +import org.apache.arrow.vector.types.pojo.Field; +import org.apache.calcite.avatica.AvaticaParameter; +import org.apache.calcite.avatica.remote.TypedValue; + +/** AvaticaParameterConverter for BinaryView Arrow types. */ +public class BinaryViewAvaticaParameterConverter extends BaseAvaticaParameterConverter { + + public BinaryViewAvaticaParameterConverter(ArrowType.BinaryView type) { + + } + + @Override + public boolean bindParameter(FieldVector vector, TypedValue typedValue, int index) { + throw new UnsupportedOperationException("Not implemented"); + } + + @Override + public AvaticaParameter createParameter(Field field) { + return createParameter(field, false); + } +} diff --git a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/converter/impl/Utf8ViewAvaticaParameterConverter.java b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/converter/impl/Utf8ViewAvaticaParameterConverter.java new file mode 100644 index 0000000000000..2c826aefb9c1c --- /dev/null +++ b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/converter/impl/Utf8ViewAvaticaParameterConverter.java @@ -0,0 +1,43 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.driver.jdbc.converter.impl; + +import org.apache.arrow.vector.FieldVector; +import org.apache.arrow.vector.types.pojo.ArrowType; +import org.apache.arrow.vector.types.pojo.Field; +import org.apache.calcite.avatica.AvaticaParameter; +import org.apache.calcite.avatica.remote.TypedValue; + +/** + * AvaticaParameterConverter for Utf8View Arrow types. + */ +public class Utf8ViewAvaticaParameterConverter extends BaseAvaticaParameterConverter { + + public Utf8ViewAvaticaParameterConverter(ArrowType.Utf8View type) { + } + + @Override + public boolean bindParameter(FieldVector vector, TypedValue typedValue, int index) { + throw new UnsupportedOperationException("Utf8View not supported"); + } + + @Override + public AvaticaParameter createParameter(Field field) { + return createParameter(field, false); + } +} diff --git a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/utils/AvaticaParameterBinder.java b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/utils/AvaticaParameterBinder.java index b2bd8e745ecca..70a58ff440ed4 100644 --- a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/utils/AvaticaParameterBinder.java +++ b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/utils/AvaticaParameterBinder.java @@ -190,6 +190,11 @@ public Boolean visit(ArrowType.Utf8 type) { return new Utf8AvaticaParameterConverter(type).bindParameter(vector, typedValue, index); } + @Override + public Boolean visit(ArrowType.Utf8View type) { + throw new UnsupportedOperationException("Utf8View is unsupported"); + } + @Override public Boolean visit(ArrowType.LargeUtf8 type) { return new LargeUtf8AvaticaParameterConverter(type).bindParameter(vector, typedValue, index); @@ -200,6 +205,11 @@ public Boolean visit(ArrowType.Binary type) { return new BinaryAvaticaParameterConverter(type).bindParameter(vector, typedValue, index); } + @Override + public Boolean visit(ArrowType.BinaryView type) { + throw new UnsupportedOperationException("BinaryView is unsupported"); + } + @Override public Boolean visit(ArrowType.LargeBinary type) { return new LargeBinaryAvaticaParameterConverter(type).bindParameter(vector, typedValue, index); @@ -244,6 +254,11 @@ public Boolean visit(ArrowType.Interval type) { public Boolean visit(ArrowType.Duration type) { return new DurationAvaticaParameterConverter(type).bindParameter(vector, typedValue, index); } + + @Override + public Boolean visit(ArrowType.ListView type) { + throw new UnsupportedOperationException("Binding is not yet supported for type " + type); + } } } diff --git a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/utils/ConvertUtils.java b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/utils/ConvertUtils.java index 843fe0cb89d9f..6ec33fafcfa46 100644 --- a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/utils/ConvertUtils.java +++ b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/utils/ConvertUtils.java @@ -23,6 +23,7 @@ import java.util.stream.Stream; import org.apache.arrow.driver.jdbc.converter.impl.BinaryAvaticaParameterConverter; +import org.apache.arrow.driver.jdbc.converter.impl.BinaryViewAvaticaParameterConverter; import org.apache.arrow.driver.jdbc.converter.impl.BoolAvaticaParameterConverter; import org.apache.arrow.driver.jdbc.converter.impl.DateAvaticaParameterConverter; import org.apache.arrow.driver.jdbc.converter.impl.DecimalAvaticaParameterConverter; @@ -43,6 +44,7 @@ import org.apache.arrow.driver.jdbc.converter.impl.TimestampAvaticaParameterConverter; import org.apache.arrow.driver.jdbc.converter.impl.UnionAvaticaParameterConverter; import org.apache.arrow.driver.jdbc.converter.impl.Utf8AvaticaParameterConverter; +import org.apache.arrow.driver.jdbc.converter.impl.Utf8ViewAvaticaParameterConverter; import org.apache.arrow.flight.sql.FlightSqlColumnMetadata; import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.types.pojo.Field; @@ -208,6 +210,11 @@ public AvaticaParameter visit(ArrowType.Utf8 type) { return new Utf8AvaticaParameterConverter(type).createParameter(field); } + @Override + public AvaticaParameter visit(ArrowType.Utf8View type) { + return new Utf8ViewAvaticaParameterConverter(type).createParameter(field); + } + @Override public AvaticaParameter visit(ArrowType.LargeUtf8 type) { return new LargeUtf8AvaticaParameterConverter(type).createParameter(field); @@ -218,6 +225,11 @@ public AvaticaParameter visit(ArrowType.Binary type) { return new BinaryAvaticaParameterConverter(type).createParameter(field); } + @Override + public AvaticaParameter visit(ArrowType.BinaryView type) { + return new BinaryViewAvaticaParameterConverter(type).createParameter(field); + } + @Override public AvaticaParameter visit(ArrowType.LargeBinary type) { return new LargeBinaryAvaticaParameterConverter(type).createParameter(field); @@ -262,6 +274,11 @@ public AvaticaParameter visit(ArrowType.Interval type) { public AvaticaParameter visit(ArrowType.Duration type) { return new DurationAvaticaParameterConverter(type).createParameter(field); } + + @Override + public AvaticaParameter visit(ArrowType.ListView type) { + throw new UnsupportedOperationException("AvaticaParameter not yet supported for type " + type); + } } } diff --git a/java/flight/flight-sql-jdbc-driver/pom.xml b/java/flight/flight-sql-jdbc-driver/pom.xml index b3afbe1defdba..4456270e7b347 100644 --- a/java/flight/flight-sql-jdbc-driver/pom.xml +++ b/java/flight/flight-sql-jdbc-driver/pom.xml @@ -15,7 +15,6 @@ org.apache.arrow arrow-flight 17.0.0-SNAPSHOT - ../pom.xml flight-sql-jdbc-driver diff --git a/java/flight/flight-sql/pom.xml b/java/flight/flight-sql/pom.xml index 7ed217db68b07..14fde34c3b4f3 100644 --- a/java/flight/flight-sql/pom.xml +++ b/java/flight/flight-sql/pom.xml @@ -15,13 +15,12 @@ org.apache.arrow arrow-flight 17.0.0-SNAPSHOT - ../pom.xml flight-sql jar Arrow Flight SQL - (Experimental)Contains utility classes to expose Flight SQL semantics for clients and servers over Arrow Flight + Contains utility classes to expose Flight SQL semantics for clients and servers over Arrow Flight 1 @@ -52,7 +51,7 @@ org.immutables - value + value-annotations org.apache.arrow @@ -119,9 +118,6 @@ jdk11+ [11,] - - !m2e.version - @@ -136,5 +132,4 @@ - diff --git a/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/FlightSqlClient.java b/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/FlightSqlClient.java index 6fe31fae9216b..a94dc563cfbcc 100644 --- a/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/FlightSqlClient.java +++ b/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/FlightSqlClient.java @@ -78,6 +78,7 @@ import org.apache.arrow.flight.SetSessionOptionsResult; import org.apache.arrow.flight.SyncPutListener; import org.apache.arrow.flight.Ticket; +import org.apache.arrow.flight.sql.impl.FlightSql; import org.apache.arrow.flight.sql.impl.FlightSql.ActionCreatePreparedStatementResult; import org.apache.arrow.flight.sql.impl.FlightSql.CommandPreparedStatementQuery; import org.apache.arrow.flight.sql.util.TableRef; @@ -1048,15 +1049,35 @@ private Schema deserializeSchema(final ByteString bytes) { public FlightInfo execute(final CallOption... options) { checkOpen(); - final FlightDescriptor descriptor = FlightDescriptor + FlightDescriptor descriptor = FlightDescriptor .command(Any.pack(CommandPreparedStatementQuery.newBuilder() .setPreparedStatementHandle(preparedStatementResult.getPreparedStatementHandle()) .build()) .toByteArray()); if (parameterBindingRoot != null && parameterBindingRoot.getRowCount() > 0) { - try (final SyncPutListener listener = putParameters(descriptor, options)) { - listener.getResult(); + try (final SyncPutListener putListener = putParameters(descriptor, options)) { + if (getParameterSchema().getFields().size() > 0 && + parameterBindingRoot != null && + parameterBindingRoot.getRowCount() > 0) { + final PutResult read = putListener.read(); + if (read != null) { + try (final ArrowBuf metadata = read.getApplicationMetadata()) { + final FlightSql.DoPutPreparedStatementResult doPutPreparedStatementResult = + FlightSql.DoPutPreparedStatementResult.parseFrom(metadata.nioBuffer()); + descriptor = FlightDescriptor + .command(Any.pack(CommandPreparedStatementQuery.newBuilder() + .setPreparedStatementHandle( + doPutPreparedStatementResult.getPreparedStatementHandle()) + .build()) + .toByteArray()); + } + } + } + } catch (final InterruptedException | ExecutionException e) { + throw CallStatus.CANCELLED.withCause(e).toRuntimeException(); + } catch (final InvalidProtocolBufferException e) { + throw CallStatus.INVALID_ARGUMENT.withCause(e).toRuntimeException(); } } diff --git a/java/flight/flight-sql/src/test/java/org/apache/arrow/flight/sql/example/DoPutPreparedStatementResultPOJO.java b/java/flight/flight-sql/src/test/java/org/apache/arrow/flight/sql/example/DoPutPreparedStatementResultPOJO.java new file mode 100644 index 0000000000000..ace78862b014d --- /dev/null +++ b/java/flight/flight-sql/src/test/java/org/apache/arrow/flight/sql/example/DoPutPreparedStatementResultPOJO.java @@ -0,0 +1,38 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.flight.sql.example; + +import java.io.Serializable; + +public class DoPutPreparedStatementResultPOJO implements Serializable { + private String query; + private byte[] parameters; + + public DoPutPreparedStatementResultPOJO(String query, byte[] parameters) { + this.query = query; + this.parameters = parameters.clone(); + } + + public String getQuery() { + return query; + } + + public byte[] getParameters() { + return parameters; + } +} diff --git a/java/flight/flight-sql/src/test/java/org/apache/arrow/flight/sql/example/FlightSqlExample.java b/java/flight/flight-sql/src/test/java/org/apache/arrow/flight/sql/example/FlightSqlExample.java index 52c402efd6f0b..36362fd8681d3 100644 --- a/java/flight/flight-sql/src/test/java/org/apache/arrow/flight/sql/example/FlightSqlExample.java +++ b/java/flight/flight-sql/src/test/java/org/apache/arrow/flight/sql/example/FlightSqlExample.java @@ -156,21 +156,22 @@ * supports all current features of Flight SQL. */ public class FlightSqlExample implements FlightSqlProducer, AutoCloseable { - private static final String DATABASE_URI = "jdbc:derby:target/derbyDB"; private static final Logger LOGGER = getLogger(FlightSqlExample.class); - private static final Calendar DEFAULT_CALENDAR = JdbcToArrowUtils.getUtcCalendar(); + protected static final Calendar DEFAULT_CALENDAR = JdbcToArrowUtils.getUtcCalendar(); + public static final String DB_NAME = "derbyDB"; + private final String databaseUri; // ARROW-15315: Use ExecutorService to simulate an async scenario private final ExecutorService executorService = Executors.newFixedThreadPool(10); private final Location location; - private final PoolingDataSource dataSource; - private final BufferAllocator rootAllocator = new RootAllocator(); + protected final PoolingDataSource dataSource; + protected final BufferAllocator rootAllocator = new RootAllocator(); private final Cache> preparedStatementLoadingCache; private final Cache> statementLoadingCache; private final SqlInfoBuilder sqlInfoBuilder; public static void main(String[] args) throws Exception { Location location = Location.forGrpcInsecure("localhost", 55555); - final FlightSqlExample example = new FlightSqlExample(location); + final FlightSqlExample example = new FlightSqlExample(location, DB_NAME); Location listenLocation = Location.forGrpcInsecure("0.0.0.0", 55555); try (final BufferAllocator allocator = new RootAllocator(); final FlightServer server = FlightServer.builder(allocator, listenLocation, example).build()) { @@ -179,13 +180,14 @@ public static void main(String[] args) throws Exception { } } - public FlightSqlExample(final Location location) { + public FlightSqlExample(final Location location, final String dbName) { // TODO Constructor should not be doing work. checkState( - removeDerbyDatabaseIfExists() && populateDerbyDatabase(), + removeDerbyDatabaseIfExists(dbName) && populateDerbyDatabase(dbName), "Failed to reset Derby database!"); + databaseUri = "jdbc:derby:target/" + dbName; final ConnectionFactory connectionFactory = - new DriverManagerConnectionFactory(DATABASE_URI, new Properties()); + new DriverManagerConnectionFactory(databaseUri, new Properties()); final PoolableConnectionFactory poolableConnectionFactory = new PoolableConnectionFactory(connectionFactory, null); final ObjectPool connectionPool = new GenericObjectPool<>(poolableConnectionFactory); @@ -248,9 +250,9 @@ public FlightSqlExample(final Location location) { } - private static boolean removeDerbyDatabaseIfExists() { + public static boolean removeDerbyDatabaseIfExists(final String dbName) { boolean wasSuccess; - final Path path = Paths.get("target" + File.separator + "derbyDB"); + final Path path = Paths.get("target" + File.separator + dbName); try (final Stream walk = Files.walk(path)) { /* @@ -262,7 +264,7 @@ private static boolean removeDerbyDatabaseIfExists() { * this not expected. */ wasSuccess = walk.sorted(Comparator.reverseOrder()).map(Path::toFile).map(File::delete) - .reduce(Boolean::logicalAnd).orElseThrow(IOException::new); + .reduce(Boolean::logicalAnd).orElseThrow(IOException::new); } catch (IOException e) { /* * The only acceptable scenario for an `IOException` to be thrown here is if @@ -277,9 +279,12 @@ private static boolean removeDerbyDatabaseIfExists() { return wasSuccess; } - private static boolean populateDerbyDatabase() { - try (final Connection connection = DriverManager.getConnection("jdbc:derby:target/derbyDB;create=true"); + private static boolean populateDerbyDatabase(final String dbName) { + try (final Connection connection = DriverManager.getConnection("jdbc:derby:target/" + dbName + ";create=true"); Statement statement = connection.createStatement()) { + + dropTable(statement, "intTable"); + dropTable(statement, "foreignTable"); statement.execute("CREATE TABLE foreignTable (" + "id INT not null primary key GENERATED ALWAYS AS IDENTITY (START WITH 1, INCREMENT BY 1), " + "foreignName varchar(100), " + @@ -302,6 +307,18 @@ private static boolean populateDerbyDatabase() { return true; } + private static void dropTable(final Statement statement, final String tableName) throws SQLException { + try { + statement.execute("DROP TABLE " + tableName); + } catch (SQLException e) { + // sql error code for "object does not exist"; which is fine, we're trying to delete the table + // see https://db.apache.org/derby/docs/10.17/ref/rrefexcept71493.html + if (!"42Y55".equals(e.getSQLState())) { + throw e; + } + } + } + private static ArrowType getArrowTypeFromJdbcType(final int jdbcDataType, final int precision, final int scale) { try { return JdbcToArrowUtils.getArrowTypeFromJdbcType(new JdbcFieldInfo(jdbcDataType, precision, scale), @@ -778,7 +795,7 @@ public void createPreparedStatement(final ActionCreatePreparedStatementRequest r // Running on another thread Future unused = executorService.submit(() -> { try { - final ByteString preparedStatementHandle = copyFrom(randomUUID().toString().getBytes(StandardCharsets.UTF_8)); + final ByteString preparedStatementHandle = copyFrom(request.getQuery().getBytes(StandardCharsets.UTF_8)); // Ownership of the connection will be passed to the context. Do NOT close! final Connection connection = dataSource.getConnection(); final PreparedStatement preparedStatement = connection.prepareStatement(request.getQuery(), @@ -882,7 +899,7 @@ public Runnable acceptPutPreparedStatementUpdate(CommandPreparedStatementUpdate while (binder.next()) { preparedStatement.addBatch(); } - int[] recordCounts = preparedStatement.executeBatch(); + final int[] recordCounts = preparedStatement.executeBatch(); recordCount = Arrays.stream(recordCounts).sum(); } @@ -928,6 +945,7 @@ public Runnable acceptPutPreparedStatementQuery(CommandPreparedStatementQuery co .toRuntimeException()); return; } + ackStream.onCompleted(); }; } @@ -1035,7 +1053,7 @@ public void getStreamTables(final CommandGetTables command, final CallContext co final String[] tableTypes = protocolSize == 0 ? null : protocolStringList.toArray(new String[protocolSize]); - try (final Connection connection = DriverManager.getConnection(DATABASE_URI); + try (final Connection connection = DriverManager.getConnection(databaseUri); final VectorSchemaRoot vectorSchemaRoot = getTablesRoot( connection.getMetaData(), rootAllocator, @@ -1086,7 +1104,7 @@ public void getStreamPrimaryKeys(final CommandGetPrimaryKeys command, final Call final String schema = command.hasDbSchema() ? command.getDbSchema() : null; final String table = command.getTable(); - try (Connection connection = DriverManager.getConnection(DATABASE_URI)) { + try (Connection connection = DriverManager.getConnection(databaseUri)) { final ResultSet primaryKeys = connection.getMetaData().getPrimaryKeys(catalog, schema, table); final VarCharVector catalogNameVector = new VarCharVector("catalog_name", rootAllocator); @@ -1140,7 +1158,7 @@ public void getStreamExportedKeys(final CommandGetExportedKeys command, final Ca String schema = command.hasDbSchema() ? command.getDbSchema() : null; String table = command.getTable(); - try (Connection connection = DriverManager.getConnection(DATABASE_URI); + try (Connection connection = DriverManager.getConnection(databaseUri); ResultSet keys = connection.getMetaData().getExportedKeys(catalog, schema, table); VectorSchemaRoot vectorSchemaRoot = createVectors(keys)) { listener.start(vectorSchemaRoot); @@ -1165,7 +1183,7 @@ public void getStreamImportedKeys(final CommandGetImportedKeys command, final Ca String schema = command.hasDbSchema() ? command.getDbSchema() : null; String table = command.getTable(); - try (Connection connection = DriverManager.getConnection(DATABASE_URI); + try (Connection connection = DriverManager.getConnection(databaseUri); ResultSet keys = connection.getMetaData().getImportedKeys(catalog, schema, table); VectorSchemaRoot vectorSchemaRoot = createVectors(keys)) { listener.start(vectorSchemaRoot); @@ -1193,7 +1211,7 @@ public void getStreamCrossReference(CommandGetCrossReference command, CallContex final String pkTable = command.getPkTable(); final String fkTable = command.getFkTable(); - try (Connection connection = DriverManager.getConnection(DATABASE_URI); + try (Connection connection = DriverManager.getConnection(databaseUri); ResultSet keys = connection.getMetaData() .getCrossReference(pkCatalog, pkSchema, pkTable, fkCatalog, fkSchema, fkTable); VectorSchemaRoot vectorSchemaRoot = createVectors(keys)) { @@ -1280,7 +1298,7 @@ public void getStreamStatement(final TicketStatementQuery ticketStatementQuery, } } - private FlightInfo getFlightInfoForSchema(final T request, final FlightDescriptor descriptor, + protected FlightInfo getFlightInfoForSchema(final T request, final FlightDescriptor descriptor, final Schema schema) { final Ticket ticket = new Ticket(pack(request).toByteArray()); // TODO Support multiple endpoints. diff --git a/java/flight/flight-sql/src/test/java/org/apache/arrow/flight/sql/example/FlightSqlStatelessExample.java b/java/flight/flight-sql/src/test/java/org/apache/arrow/flight/sql/example/FlightSqlStatelessExample.java new file mode 100644 index 0000000000000..c79c09c0967dc --- /dev/null +++ b/java/flight/flight-sql/src/test/java/org/apache/arrow/flight/sql/example/FlightSqlStatelessExample.java @@ -0,0 +1,238 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.flight.sql.example; + +import static java.lang.String.format; +import static org.apache.arrow.adapter.jdbc.JdbcToArrow.sqlToArrowVectorIterator; +import static org.apache.arrow.adapter.jdbc.JdbcToArrowUtils.jdbcToArrowSchema; +import static org.apache.arrow.flight.sql.impl.FlightSql.*; +import static org.slf4j.LoggerFactory.getLogger; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.ObjectInputStream; +import java.io.ObjectOutputStream; +import java.io.StreamCorruptedException; +import java.nio.ByteBuffer; +import java.nio.channels.Channels; +import java.sql.Connection; +import java.sql.PreparedStatement; +import java.sql.ResultSet; +import java.sql.ResultSetMetaData; +import java.sql.SQLException; + +import org.apache.arrow.adapter.jdbc.ArrowVectorIterator; +import org.apache.arrow.adapter.jdbc.JdbcParameterBinder; +import org.apache.arrow.flight.CallStatus; +import org.apache.arrow.flight.FlightDescriptor; +import org.apache.arrow.flight.FlightInfo; +import org.apache.arrow.flight.FlightStream; +import org.apache.arrow.flight.Location; +import org.apache.arrow.flight.PutResult; +import org.apache.arrow.flight.sql.FlightSqlProducer; +import org.apache.arrow.memory.ArrowBuf; +import org.apache.arrow.vector.VectorLoader; +import org.apache.arrow.vector.VectorSchemaRoot; +import org.apache.arrow.vector.VectorUnloader; +import org.apache.arrow.vector.ipc.ArrowFileReader; +import org.apache.arrow.vector.ipc.ArrowFileWriter; +import org.apache.arrow.vector.ipc.SeekableReadChannel; +import org.apache.arrow.vector.ipc.message.ArrowBlock; +import org.apache.arrow.vector.types.pojo.Schema; +import org.apache.arrow.vector.util.ByteArrayReadableSeekableByteChannel; +import org.slf4j.Logger; + +import com.google.protobuf.ByteString; + +/** + * Example {@link FlightSqlProducer} implementation showing an Apache Derby backed Flight SQL server that generally + * supports all current features of Flight SQL. + */ +public class FlightSqlStatelessExample extends FlightSqlExample { + private static final Logger LOGGER = getLogger(FlightSqlStatelessExample.class); + public static final String DB_NAME = "derbyStatelessDB"; + + + public FlightSqlStatelessExample(final Location location, final String dbName) { + super(location, dbName); + } + + @Override + public Runnable acceptPutPreparedStatementQuery(CommandPreparedStatementQuery command, CallContext context, + FlightStream flightStream, StreamListener ackStream) { + + return () -> { + final String query = new String(command.getPreparedStatementHandle().toStringUtf8()); + try (Connection connection = dataSource.getConnection(); + PreparedStatement preparedStatement = createPreparedStatement(connection, query)) { + while (flightStream.next()) { + final VectorSchemaRoot root = flightStream.getRoot(); + final JdbcParameterBinder binder = JdbcParameterBinder.builder(preparedStatement, root).bindAll().build(); + while (binder.next()) { + // Do not execute() - will be done in a getStream call + } + + final ByteArrayOutputStream parametersStream = new ByteArrayOutputStream(); + try (ArrowFileWriter writer = new ArrowFileWriter(root, null, Channels.newChannel(parametersStream)) + ) { + writer.start(); + writer.writeBatch(); + } + + if (parametersStream.size() > 0) { + final DoPutPreparedStatementResultPOJO doPutPreparedStatementResultPOJO = + new DoPutPreparedStatementResultPOJO(query, parametersStream.toByteArray()); + + final byte[] doPutPreparedStatementResultPOJOArr = serializePOJO(doPutPreparedStatementResultPOJO); + final DoPutPreparedStatementResult doPutPreparedStatementResult = + DoPutPreparedStatementResult.newBuilder() + .setPreparedStatementHandle( + ByteString.copyFrom(ByteBuffer.wrap(doPutPreparedStatementResultPOJOArr))) + .build(); + + try (final ArrowBuf buffer = rootAllocator.buffer(doPutPreparedStatementResult.getSerializedSize())) { + buffer.writeBytes(doPutPreparedStatementResult.toByteArray()); + ackStream.onNext(PutResult.metadata(buffer)); + } + } + } + + } catch (SQLException | IOException e) { + ackStream.onError(CallStatus.INTERNAL + .withDescription("Failed to bind parameters: " + e.getMessage()) + .withCause(e) + .toRuntimeException()); + return; + } + + ackStream.onCompleted(); + }; + } + + @Override + public void getStreamPreparedStatement(final CommandPreparedStatementQuery command, final CallContext context, + final ServerStreamListener listener) { + final byte[] handle = command.getPreparedStatementHandle().toByteArray(); + try { + // Case where there are parameters + try { + final DoPutPreparedStatementResultPOJO doPutPreparedStatementResultPOJO = + deserializePOJO(handle); + final String query = doPutPreparedStatementResultPOJO.getQuery(); + + try (Connection connection = dataSource.getConnection(); + PreparedStatement statement = createPreparedStatement(connection, query); + ArrowFileReader reader = new ArrowFileReader(new SeekableReadChannel( + new ByteArrayReadableSeekableByteChannel( + doPutPreparedStatementResultPOJO.getParameters())), rootAllocator)) { + + for (ArrowBlock arrowBlock : reader.getRecordBlocks()) { + reader.loadRecordBatch(arrowBlock); + VectorSchemaRoot vectorSchemaRootRecover = reader.getVectorSchemaRoot(); + JdbcParameterBinder binder = JdbcParameterBinder.builder(statement, vectorSchemaRootRecover) + .bindAll().build(); + + while (binder.next()) { + executeQuery(statement, listener); + } + } + } + } catch (StreamCorruptedException e) { + // Case where there are no parameters + final String query = new String(command.getPreparedStatementHandle().toStringUtf8()); + try (Connection connection = dataSource.getConnection(); + PreparedStatement preparedStatement = createPreparedStatement(connection, query)) { + executeQuery(preparedStatement, listener); + } + } + } catch (final SQLException | IOException | ClassNotFoundException e) { + LOGGER.error(format("Failed to getStreamPreparedStatement: <%s>.", e.getMessage()), e); + listener.error(CallStatus.INTERNAL.withDescription("Failed to prepare statement: " + e).toRuntimeException()); + } finally { + listener.completed(); + } + } + + private void executeQuery(PreparedStatement statement, + final ServerStreamListener listener) throws IOException, SQLException { + try (final ResultSet resultSet = statement.executeQuery()) { + final Schema schema = jdbcToArrowSchema(resultSet.getMetaData(), DEFAULT_CALENDAR); + try (final VectorSchemaRoot vectorSchemaRoot = VectorSchemaRoot.create(schema, rootAllocator)) { + final VectorLoader loader = new VectorLoader(vectorSchemaRoot); + listener.start(vectorSchemaRoot); + + final ArrowVectorIterator iterator = sqlToArrowVectorIterator(resultSet, rootAllocator); + while (iterator.hasNext()) { + final VectorSchemaRoot batch = iterator.next(); + if (batch.getRowCount() == 0) { + break; + } + final VectorUnloader unloader = new VectorUnloader(batch); + loader.load(unloader.getRecordBatch()); + listener.putNext(); + vectorSchemaRoot.clear(); + } + listener.putNext(); + } + } + } + + @Override + public FlightInfo getFlightInfoPreparedStatement(final CommandPreparedStatementQuery command, + final CallContext context, + final FlightDescriptor descriptor) { + final byte[] handle = command.getPreparedStatementHandle().toByteArray(); + try { + String query; + try { + query = deserializePOJO(handle).getQuery(); + } catch (StreamCorruptedException e) { + query = new String(command.getPreparedStatementHandle().toStringUtf8()); + } + try (Connection connection = dataSource.getConnection(); + PreparedStatement statement = createPreparedStatement(connection, query)) { + ResultSetMetaData metaData = statement.getMetaData(); + return getFlightInfoForSchema(command, descriptor, + jdbcToArrowSchema(metaData, DEFAULT_CALENDAR)); + } + } catch (final SQLException | IOException | ClassNotFoundException e) { + LOGGER.error(format("There was a problem executing the prepared statement: <%s>.", e.getMessage()), e); + throw CallStatus.INTERNAL.withCause(e).toRuntimeException(); + } + } + + private DoPutPreparedStatementResultPOJO deserializePOJO(byte[] handle) throws IOException, ClassNotFoundException { + try (ByteArrayInputStream bis = new ByteArrayInputStream(handle); + ObjectInputStream ois = new ObjectInputStream(bis)) { + return (DoPutPreparedStatementResultPOJO) ois.readObject(); + } + } + + private byte[] serializePOJO(DoPutPreparedStatementResultPOJO doPutPreparedStatementResultPOJO) throws IOException { + try (ByteArrayOutputStream bos = new ByteArrayOutputStream(); + ObjectOutputStream oos = new ObjectOutputStream(bos)) { + oos.writeObject(doPutPreparedStatementResultPOJO); + return bos.toByteArray(); + } + } + + private PreparedStatement createPreparedStatement(Connection connection, String query) throws SQLException { + return connection.prepareStatement(query, ResultSet.TYPE_SCROLL_INSENSITIVE, ResultSet.CONCUR_READ_ONLY); + } +} diff --git a/java/flight/flight-sql/src/test/java/org/apache/arrow/flight/sql/test/TestFlightSql.java b/java/flight/flight-sql/src/test/java/org/apache/arrow/flight/sql/test/TestFlightSql.java index a39736e939f0b..ffffdd62ac950 100644 --- a/java/flight/flight-sql/src/test/java/org/apache/arrow/flight/sql/test/TestFlightSql.java +++ b/java/flight/flight-sql/src/test/java/org/apache/arrow/flight/sql/test/TestFlightSql.java @@ -87,63 +87,72 @@ public class TestFlightSql { Field.nullable("FOREIGNID", MinorType.INT.getType()))); private static final List> EXPECTED_RESULTS_FOR_STAR_SELECT_QUERY = ImmutableList.of( asList("1", "one", "1", "1"), asList("2", "zero", "0", "1"), asList("3", "negative one", "-1", "1")); - private static final List> EXPECTED_RESULTS_FOR_PARAMETER_BINDING = ImmutableList.of( + protected static final List> EXPECTED_RESULTS_FOR_PARAMETER_BINDING = ImmutableList.of( asList("1", "one", "1", "1")); private static final Map GET_SQL_INFO_EXPECTED_RESULTS_MAP = new LinkedHashMap<>(); - private static final String LOCALHOST = "localhost"; - private static BufferAllocator allocator; - private static FlightServer server; - private static FlightSqlClient sqlClient; + protected static final String LOCALHOST = "localhost"; + protected static BufferAllocator allocator; + protected static FlightServer server; + protected static FlightSqlClient sqlClient; @BeforeAll public static void setUp() throws Exception { + setUpClientServer(); + setUpExpectedResultsMap(); + } + + private static void setUpClientServer() throws Exception { allocator = new RootAllocator(Integer.MAX_VALUE); final Location serverLocation = Location.forGrpcInsecure(LOCALHOST, 0); - server = FlightServer.builder(allocator, serverLocation, new FlightSqlExample(serverLocation)) - .build() - .start(); + server = FlightServer.builder(allocator, serverLocation, + new FlightSqlExample(serverLocation, FlightSqlExample.DB_NAME)) + .build() + .start(); final Location clientLocation = Location.forGrpcInsecure(LOCALHOST, server.getPort()); sqlClient = new FlightSqlClient(FlightClient.builder(allocator, clientLocation).build()); + } + protected static void setUpExpectedResultsMap() { GET_SQL_INFO_EXPECTED_RESULTS_MAP - .put(Integer.toString(FlightSql.SqlInfo.FLIGHT_SQL_SERVER_NAME_VALUE), "Apache Derby"); + .put(Integer.toString(FlightSql.SqlInfo.FLIGHT_SQL_SERVER_NAME_VALUE), "Apache Derby"); GET_SQL_INFO_EXPECTED_RESULTS_MAP - .put(Integer.toString(FlightSql.SqlInfo.FLIGHT_SQL_SERVER_VERSION_VALUE), "10.14.2.0 - (1828579)"); + .put(Integer.toString(FlightSql.SqlInfo.FLIGHT_SQL_SERVER_VERSION_VALUE), "10.14.2.0 - (1828579)"); GET_SQL_INFO_EXPECTED_RESULTS_MAP - .put(Integer.toString(FlightSql.SqlInfo.FLIGHT_SQL_SERVER_ARROW_VERSION_VALUE), "10.14.2.0 - (1828579)"); + .put(Integer.toString(FlightSql.SqlInfo.FLIGHT_SQL_SERVER_ARROW_VERSION_VALUE), "10.14.2.0 - (1828579)"); GET_SQL_INFO_EXPECTED_RESULTS_MAP - .put(Integer.toString(FlightSql.SqlInfo.FLIGHT_SQL_SERVER_READ_ONLY_VALUE), "false"); + .put(Integer.toString(FlightSql.SqlInfo.FLIGHT_SQL_SERVER_READ_ONLY_VALUE), "false"); GET_SQL_INFO_EXPECTED_RESULTS_MAP - .put(Integer.toString(FlightSql.SqlInfo.SQL_ALL_TABLES_ARE_SELECTABLE_VALUE), "true"); + .put(Integer.toString(FlightSql.SqlInfo.SQL_ALL_TABLES_ARE_SELECTABLE_VALUE), "true"); GET_SQL_INFO_EXPECTED_RESULTS_MAP - .put( - Integer.toString(FlightSql.SqlInfo.SQL_NULL_ORDERING_VALUE), - Integer.toString(FlightSql.SqlNullOrdering.SQL_NULLS_SORTED_AT_END_VALUE)); + .put( + Integer.toString(FlightSql.SqlInfo.SQL_NULL_ORDERING_VALUE), + Integer.toString(FlightSql.SqlNullOrdering.SQL_NULLS_SORTED_AT_END_VALUE)); GET_SQL_INFO_EXPECTED_RESULTS_MAP - .put(Integer.toString(FlightSql.SqlInfo.SQL_DDL_CATALOG_VALUE), "false"); + .put(Integer.toString(FlightSql.SqlInfo.SQL_DDL_CATALOG_VALUE), "false"); GET_SQL_INFO_EXPECTED_RESULTS_MAP - .put(Integer.toString(FlightSql.SqlInfo.SQL_DDL_SCHEMA_VALUE), "true"); + .put(Integer.toString(FlightSql.SqlInfo.SQL_DDL_SCHEMA_VALUE), "true"); GET_SQL_INFO_EXPECTED_RESULTS_MAP - .put(Integer.toString(FlightSql.SqlInfo.SQL_DDL_TABLE_VALUE), "true"); + .put(Integer.toString(FlightSql.SqlInfo.SQL_DDL_TABLE_VALUE), "true"); GET_SQL_INFO_EXPECTED_RESULTS_MAP - .put( - Integer.toString(FlightSql.SqlInfo.SQL_IDENTIFIER_CASE_VALUE), - Integer.toString(SqlSupportedCaseSensitivity.SQL_CASE_SENSITIVITY_UPPERCASE_VALUE)); + .put( + Integer.toString(FlightSql.SqlInfo.SQL_IDENTIFIER_CASE_VALUE), + Integer.toString(SqlSupportedCaseSensitivity.SQL_CASE_SENSITIVITY_UPPERCASE_VALUE)); GET_SQL_INFO_EXPECTED_RESULTS_MAP - .put(Integer.toString(FlightSql.SqlInfo.SQL_IDENTIFIER_QUOTE_CHAR_VALUE), "\""); + .put(Integer.toString(FlightSql.SqlInfo.SQL_IDENTIFIER_QUOTE_CHAR_VALUE), "\""); GET_SQL_INFO_EXPECTED_RESULTS_MAP - .put( - Integer.toString(FlightSql.SqlInfo.SQL_QUOTED_IDENTIFIER_CASE_VALUE), - Integer.toString(SqlSupportedCaseSensitivity.SQL_CASE_SENSITIVITY_CASE_INSENSITIVE_VALUE)); + .put( + Integer.toString(FlightSql.SqlInfo.SQL_QUOTED_IDENTIFIER_CASE_VALUE), + Integer.toString(SqlSupportedCaseSensitivity.SQL_CASE_SENSITIVITY_CASE_INSENSITIVE_VALUE)); GET_SQL_INFO_EXPECTED_RESULTS_MAP - .put(Integer.toString(FlightSql.SqlInfo.SQL_MAX_COLUMNS_IN_TABLE_VALUE), "42"); + .put(Integer.toString(FlightSql.SqlInfo.SQL_MAX_COLUMNS_IN_TABLE_VALUE), "42"); } @AfterAll public static void tearDown() throws Exception { close(sqlClient, server, allocator); + FlightSqlExample.removeDerbyDatabaseIfExists(FlightSqlExample.DB_NAME); } private static List> getNonConformingResultsForGetSqlInfo(final List> results) { diff --git a/java/flight/flight-sql/src/test/java/org/apache/arrow/flight/sql/test/TestFlightSqlStateless.java b/java/flight/flight-sql/src/test/java/org/apache/arrow/flight/sql/test/TestFlightSqlStateless.java new file mode 100644 index 0000000000000..09c7b2ef87f45 --- /dev/null +++ b/java/flight/flight-sql/src/test/java/org/apache/arrow/flight/sql/test/TestFlightSqlStateless.java @@ -0,0 +1,99 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.flight.sql.test; + +import static org.apache.arrow.flight.sql.util.FlightStreamUtils.getResults; +import static org.apache.arrow.util.AutoCloseables.close; +import static org.hamcrest.CoreMatchers.*; + +import org.apache.arrow.flight.FlightClient; +import org.apache.arrow.flight.FlightEndpoint; +import org.apache.arrow.flight.FlightInfo; +import org.apache.arrow.flight.FlightServer; +import org.apache.arrow.flight.FlightStream; +import org.apache.arrow.flight.Location; +import org.apache.arrow.flight.sql.FlightSqlClient; +import org.apache.arrow.flight.sql.FlightSqlClient.PreparedStatement; +import org.apache.arrow.flight.sql.example.FlightSqlStatelessExample; +import org.apache.arrow.memory.RootAllocator; +import org.apache.arrow.vector.IntVector; +import org.apache.arrow.vector.VectorSchemaRoot; +import org.apache.arrow.vector.types.pojo.Schema; +import org.hamcrest.MatcherAssert; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; + +/** + * Test direct usage of Flight SQL workflows. + */ +public class TestFlightSqlStateless extends TestFlightSql { + + @BeforeAll + public static void setUp() throws Exception { + setUpClientServer(); + setUpExpectedResultsMap(); + } + + @AfterAll + public static void tearDown() throws Exception { + close(sqlClient, server, allocator); + FlightSqlStatelessExample.removeDerbyDatabaseIfExists(FlightSqlStatelessExample.DB_NAME); + } + + private static void setUpClientServer() throws Exception { + allocator = new RootAllocator(Integer.MAX_VALUE); + + final Location serverLocation = Location.forGrpcInsecure(LOCALHOST, 0); + server = FlightServer.builder(allocator, serverLocation, + new FlightSqlStatelessExample(serverLocation, FlightSqlStatelessExample.DB_NAME)) + .build() + .start(); + + final Location clientLocation = Location.forGrpcInsecure(LOCALHOST, server.getPort()); + sqlClient = new FlightSqlClient(FlightClient.builder(allocator, clientLocation).build()); + } + + @Override + @Test + public void testSimplePreparedStatementResultsWithParameterBinding() throws Exception { + try (PreparedStatement prepare = sqlClient.prepare("SELECT * FROM intTable WHERE id = ?")) { + final Schema parameterSchema = prepare.getParameterSchema(); + try (final VectorSchemaRoot insertRoot = VectorSchemaRoot.create(parameterSchema, allocator)) { + insertRoot.allocateNew(); + + final IntVector valueVector = (IntVector) insertRoot.getVector(0); + valueVector.setSafe(0, 1); + insertRoot.setRowCount(1); + + prepare.setParameters(insertRoot); + final FlightInfo flightInfo = prepare.execute(); + + for (FlightEndpoint endpoint: flightInfo.getEndpoints()) { + try (FlightStream stream = sqlClient.getStream(endpoint.getTicket())) { + Assertions.assertAll( + () -> MatcherAssert.assertThat(stream.getSchema(), is(SCHEMA_INT_TABLE)), + () -> MatcherAssert.assertThat(getResults(stream), is(EXPECTED_RESULTS_FOR_PARAMETER_BINDING)) + ); + } + } + } + } + } +} diff --git a/java/format/pom.xml b/java/format/pom.xml index e9eded79de660..4483047e20960 100644 --- a/java/format/pom.xml +++ b/java/format/pom.xml @@ -31,7 +31,6 @@ - @@ -42,6 +41,5 @@ - diff --git a/java/gandiva/pom.xml b/java/gandiva/pom.xml index 00acb89f1d7cf..1c17023e5c8ad 100644 --- a/java/gandiva/pom.xml +++ b/java/gandiva/pom.xml @@ -22,13 +22,12 @@ jar Arrow Gandiva Java wrappers around the native Gandiva SQL expression compiler. + - 1.8 - 1.8 - 3.25.1 true ../../../cpp/release-build + org.apache.arrow @@ -36,7 +35,7 @@ org.immutables - value + value-annotations org.apache.arrow @@ -51,7 +50,6 @@ com.google.protobuf protobuf-java - ${protobuf.version} com.google.guava @@ -62,6 +60,7 @@ slf4j-api + @@ -88,14 +87,6 @@ - - - - kr.motd.maven - os-maven-plugin - 1.7.1 - - @@ -105,7 +96,6 @@ org.apache.maven.plugins maven-source-plugin - 2.2.1 attach-sources @@ -118,7 +108,6 @@ org.apache.maven.plugins maven-javadoc-plugin - 3.6.3 attach-javadocs @@ -131,7 +120,6 @@ org.apache.maven.plugins maven-gpg-plugin - 3.2.2 sign-artifacts @@ -146,5 +134,4 @@ - diff --git a/java/maven/module-info-compiler-maven-plugin/pom.xml b/java/maven/module-info-compiler-maven-plugin/pom.xml index 6589020d6ecb5..9f0cd7b1039dd 100644 --- a/java/maven/module-info-compiler-maven-plugin/pom.xml +++ b/java/maven/module-info-compiler-maven-plugin/pom.xml @@ -64,44 +64,19 @@ org.apache.maven.plugin-tools maven-plugin-annotations - 3.11.0 + ${maven.plugin.tools.version} provided - - - maven-clean-plugin - 3.3.2 - - - maven-plugin-plugin - 3.12.0 - - - maven-jar-plugin - 3.3.0 - - - maven-install-plugin - 3.1.1 - - - maven-deploy-plugin - 3.1.1 - - - maven-invoker-plugin - 3.1.0 - com.gradle - gradle-enterprise-maven-extension + develocity-maven-extension - + @@ -109,7 +84,7 @@ - + @@ -118,7 +93,6 @@ org.apache.maven.plugins maven-plugin-plugin - 3.12.0 true diff --git a/java/maven/pom.xml b/java/maven/pom.xml index f290ded2e2913..8a4043016e770 100644 --- a/java/maven/pom.xml +++ b/java/maven/pom.xml @@ -15,6 +15,13 @@ Note: Do not inherit from the Arrow parent POM as plugins can be referenced during the parent POM, introducing circular dependencies. --> + + org.apache + apache + 31 + + + org.apache.arrow.maven.plugins arrow-maven-plugins 17.0.0-SNAPSHOT @@ -27,25 +34,38 @@ true + + 1.8 + 1.8 + 3.12.0 + 3.2.5 + 0.16.1 + 3.7.1 + 3.12.1 + 3.6.1 + 3.2.4 + 3.2.2 + 3.6.3 + 3.5.0 - org.apache.maven.plugins - maven-project-info-reports-plugin - 3.5.0 + com.diffplug.spotless + spotless-maven-plugin + 2.30.0 - org.apache.maven.plugins - maven-site-plugin - 3.12.1 + pl.project13.maven + git-commit-id-plugin + 4.0.5 - com.diffplug.spotless - spotless-maven-plugin - 2.30.0 + org.cyclonedx + cyclonedx-maven-plugin + 2.7.11 @@ -119,11 +139,6 @@ **/logback.xml - true - - true - true - org.apache.arrow ${username} @@ -143,43 +158,17 @@ - - org.apache.maven.plugins - maven-resources-plugin - - UTF-8 - - org.apache.maven.plugins maven-compiler-plugin - UTF-8 - 1.8 - 1.8 2048m - false true maven-enforcer-plugin - - validate_java_and_maven_version - - enforce - - verify - false - - - - [3.3.0,4) - - - - avoid_bad_dependencies @@ -205,8 +194,6 @@ pl.project13.maven git-commit-id-plugin - 4.0.5 - dd.MM.yyyy '@' HH:mm:ss z false @@ -248,10 +235,9 @@ org.apache.maven.plugins maven-checkstyle-plugin - 3.1.0 ../dev/checkstyle/checkstyle.xml - ../dev/checkstyle/checkstyle.license + ../dev/license/asf-java.license ../dev/checkstyle/suppressions.xml true UTF-8 @@ -272,7 +258,7 @@ org.slf4j jcl-over-slf4j - 2.0.12 + 2.0.13 @@ -288,7 +274,6 @@ org.cyclonedx cyclonedx-maven-plugin - 2.7.11 @@ -298,28 +283,6 @@ - - - org.apache.maven.plugins - maven-assembly-plugin - - - src - - - - - - single - - package - - - - org.apache.maven.plugins maven-project-info-reports-plugin @@ -353,13 +316,34 @@ org.apache.maven.plugins maven-project-info-reports-plugin - 3.5.0 org.apache.maven.plugins maven-site-plugin - 3.12.1 + + + + apache-release + + + + org.apache.maven.plugins + maven-assembly-plugin + + + source-release-assembly + + + true + + + + + + + + diff --git a/java/memory/memory-core/pom.xml b/java/memory/memory-core/pom.xml index ca5bc603bd4dc..783a13a6fb0ad 100644 --- a/java/memory/memory-core/pom.xml +++ b/java/memory/memory-core/pom.xml @@ -33,7 +33,7 @@ org.immutables - value + value-annotations org.checkerframework @@ -61,9 +61,6 @@ jdk11+ [11,] - - !m2e.version - @@ -92,7 +89,6 @@ org.apache.maven.plugins maven-surefire-plugin - opens-tests @@ -101,12 +97,9 @@ test - - -Dfoo=bar - - - **/TestArrowBuf.java - + + + **/TestOpens.java @@ -129,9 +122,6 @@ org.apache.maven.plugins maven-compiler-plugin - 8 - 8 - UTF-8 -Xmaxerrs @@ -150,12 +140,6 @@ ${checker.framework.version} - - - org.immutables.value.internal.$processor$.$Processor - - org.checkerframework.checker.nullness.NullnessChecker - diff --git a/java/memory/memory-core/src/main/java/module-info.java b/java/memory/memory-core/src/main/java/module-info.java index 34ba34e80bc69..d8c22dd993dd9 100644 --- a/java/memory/memory-core/src/main/java/module-info.java +++ b/java/memory/memory-core/src/main/java/module-info.java @@ -23,6 +23,5 @@ exports org.apache.arrow.util; requires transitive jdk.unsupported; requires jsr305; - requires org.immutables.value; requires org.slf4j; } diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/AllocationOutcomeDetails.java b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/AllocationOutcomeDetails.java index 3ceda71cce0fe..fd534b189987c 100644 --- a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/AllocationOutcomeDetails.java +++ b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/AllocationOutcomeDetails.java @@ -119,16 +119,14 @@ public boolean isAllocationFailed() { @Override public String toString() { - return new StringBuilder() - .append("allocator[" + accountant.getName() + "]") - .append(" reservation: " + accountant.getInitReservation()) - .append(" limit: " + limit) - .append(" used: " + used) - .append(" requestedSize: " + requestedSize) - .append(" allocatedSize: " + allocatedSize) - .append(" localAllocationStatus: " + (allocationFailed ? "fail" : "success")) - .append("\n") - .toString(); + return "allocator[" + accountant.getName() + "]" + + " reservation: " + accountant.getInitReservation() + + " limit: " + limit + + " used: " + used + + " requestedSize: " + requestedSize + + " allocatedSize: " + allocatedSize + + " localAllocationStatus: " + (allocationFailed ? "fail" : "success") + + "\n"; } } diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/AllocationReservation.java b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/AllocationReservation.java index c672dc48d79ca..cc6cbf7e6f2c1 100644 --- a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/AllocationReservation.java +++ b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/AllocationReservation.java @@ -84,5 +84,5 @@ public interface AllocationReservation extends AutoCloseable { */ boolean isClosed(); - void close(); + @Override void close(); } diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/BaseAllocator.java b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/BaseAllocator.java index 189c800ba0fe5..89b8ffd322a9b 100644 --- a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/BaseAllocator.java +++ b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/BaseAllocator.java @@ -532,6 +532,8 @@ public String toVerboseString() { return sb.toString(); } + /* Remove @SuppressWarnings after fixing https://github.com/apache/arrow/issues/41951 */ + @SuppressWarnings("FormatStringAnnotation") private void hist(String noteFormat, Object... args) { if (historicalLog != null) { historicalLog.recordEvent(noteFormat, args); diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/BufferLedger.java b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/BufferLedger.java index 62d268a1f4493..c610066c982bd 100644 --- a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/BufferLedger.java +++ b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/BufferLedger.java @@ -58,6 +58,7 @@ boolean isOwningLedger() { return this == allocationManager.getOwningLedger(); } + @Override public BufferAllocator getKey() { return allocator; } @@ -419,7 +420,7 @@ public TransferResult transferOwnership(final ArrowBuf srcBuffer, final BufferAl /** * The outcome of a Transfer. */ - public class TransferResult implements OwnershipTransferResult { + public static class TransferResult implements OwnershipTransferResult { // Whether this transfer fit within the target allocator's capacity. final boolean allocationFit; diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/BufferManager.java b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/BufferManager.java index 6b622e7192789..e7877d7a3e287 100644 --- a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/BufferManager.java +++ b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/BufferManager.java @@ -49,5 +49,5 @@ public interface BufferManager extends AutoCloseable { */ ArrowBuf getManagedBuffer(long size); - void close(); + @Override void close(); } diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/CheckAllocator.java b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/CheckAllocator.java index dac4a3fcff59a..bfbf1f212e69a 100644 --- a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/CheckAllocator.java +++ b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/CheckAllocator.java @@ -60,6 +60,7 @@ static String check() { } } + @SuppressWarnings("URLEqualsHashCode") private static Set scanClasspath() { // LinkedHashSet appropriate here because it preserves insertion order // during iteration diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/ChildAllocator.java b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/ChildAllocator.java index 67156f89d13aa..f3132cb46a21c 100644 --- a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/ChildAllocator.java +++ b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/ChildAllocator.java @@ -21,7 +21,7 @@ /** * Child allocator class. Only slightly different from the {@see RootAllocator}, * in that these can't be created directly, but must be obtained from - * {@see BufferAllocator#newChildAllocator(AllocatorOwner, long, long, int)}. + * {@link BufferAllocator#newChildAllocator(String, AllocationListener, long, long)}. * *

    Child allocators can only be created by the root, or other children, so * this class is package private.

    diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/LowCostIdentityHashMap.java b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/LowCostIdentityHashMap.java index 740233ef411ff..251028aff0e3c 100644 --- a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/LowCostIdentityHashMap.java +++ b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/LowCostIdentityHashMap.java @@ -255,7 +255,7 @@ void rehash() { } private void computeMaxSize() { - threshold = (int) ((long) (elementData.length) * LOAD_FACTOR / 10000); + threshold = (int) ((long) elementData.length * LOAD_FACTOR / 10000); } /** @@ -309,7 +309,6 @@ private void computeMaxSize() { elementData[index] = null; } } - return (V) result; } diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/ReusableBuffer.java b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/ReusableBuffer.java index 3530b819aadff..9e37c286ad831 100644 --- a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/ReusableBuffer.java +++ b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/ReusableBuffer.java @@ -44,4 +44,6 @@ public interface ReusableBuffer { * @param len the number of bytes of the new data */ void set(ArrowBuf srcBytes, long start, long len); + + void set(byte[] srcBytes, long start, long len); } diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/rounding/DefaultRoundingPolicy.java b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/rounding/DefaultRoundingPolicy.java index 7ba231b0c2095..5939e803fdcd6 100644 --- a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/rounding/DefaultRoundingPolicy.java +++ b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/rounding/DefaultRoundingPolicy.java @@ -44,20 +44,16 @@ public class DefaultRoundingPolicy implements RoundingPolicy { static { int defaultPageSize = Integer.getInteger("org.apache.memory.allocator.pageSize", 8192); - Throwable pageSizeFallbackCause = null; try { validateAndCalculatePageShifts(defaultPageSize); } catch (Throwable t) { - pageSizeFallbackCause = t; defaultPageSize = 8192; } int defaultMaxOrder = Integer.getInteger("org.apache.memory.allocator.maxOrder", 11); - Throwable maxOrderFallbackCause = null; try { validateAndCalculateChunkSize(defaultPageSize, defaultMaxOrder); } catch (Throwable t) { - maxOrderFallbackCause = t; defaultMaxOrder = 11; } DEFAULT_CHUNK_SIZE = validateAndCalculateChunkSize(defaultPageSize, defaultMaxOrder); diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/ArrowBufPointer.java b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/ArrowBufPointer.java index b41576847d6b7..5775dd794348b 100644 --- a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/ArrowBufPointer.java +++ b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/ArrowBufPointer.java @@ -27,7 +27,7 @@ * Pointer to a memory region within an {@link ArrowBuf}. * It will be used as the basis for calculating hash code within a vector, and equality determination. */ -public final class ArrowBufPointer { +public final class ArrowBufPointer implements Comparable { /** * The hash code when the arrow buffer is null. @@ -174,6 +174,7 @@ public int hashCode() { * a positive integer if this pointer is larger; * a negative integer if this pointer is smaller. */ + @Override public int compareTo(ArrowBufPointer that) { if (this.buf == null || that.buf == null) { if (this.buf == null && that.buf == null) { diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/Float16.java b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/Float16.java index 8040158fd090e..5b80816d48ff7 100644 --- a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/Float16.java +++ b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/Float16.java @@ -161,7 +161,7 @@ public static float toFloat(short b) { int bits = b & 0xffff; int s = bits & SIGN_MASK; int e = (bits >>> EXPONENT_SHIFT) & SHIFTED_EXPONENT_MASK; - int m = (bits) & SIGNIFICAND_MASK; + int m = bits & SIGNIFICAND_MASK; int outE = 0; int outM = 0; if (e == 0) { // Denormal or 0 @@ -209,7 +209,7 @@ public static short toFloat16(float f) { int bits = Float.floatToRawIntBits(f); int s = (bits >>> FP32_SIGN_SHIFT); int e = (bits >>> FP32_EXPONENT_SHIFT) & FP32_SHIFTED_EXPONENT_MASK; - int m = (bits) & FP32_SIGNIFICAND_MASK; + int m = bits & FP32_SIGNIFICAND_MASK; int outE = 0; int outM = 0; if (e == 0xff) { // Infinite or NaN diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/HistoricalLog.java b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/HistoricalLog.java index 21f063c939ec8..910cc1c21d72d 100644 --- a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/HistoricalLog.java +++ b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/HistoricalLog.java @@ -17,8 +17,9 @@ package org.apache.arrow.memory.util; +import java.util.ArrayDeque; import java.util.Arrays; -import java.util.LinkedList; +import java.util.Deque; import org.checkerframework.checker.nullness.qual.Nullable; import org.slf4j.Logger; @@ -30,7 +31,7 @@ */ public class HistoricalLog { - private final LinkedList history = new LinkedList<>(); + private final Deque history = new ArrayDeque<>(); private final String idString; // the formatted id string private final int limit; // the limit on the number of events kept private @Nullable Event firstEvent; // the first stack trace recorded @@ -44,6 +45,8 @@ public class HistoricalLog { * associated with the object instance is best. * @param args for the format string, or nothing if none are required */ + @SuppressWarnings("FormatStringAnnotation") + /* Remove @SuppressWarnings after fixing https://github.com/apache/arrow/issues/41951 */ public HistoricalLog(final String idStringFormat, Object... args) { this(Integer.MAX_VALUE, idStringFormat, args); } @@ -66,7 +69,9 @@ public HistoricalLog(final String idStringFormat, Object... args) { * associated with the object instance is best. * @param args for the format string, or nothing if none are required */ + @SuppressWarnings("AnnotateFormatMethod") public HistoricalLog(final int limit, final String idStringFormat, Object... args) { + // Remove @SuppressWarnings after fixing https://github.com/apache/arrow/issues/41951 this.limit = limit; this.idString = String.format(idStringFormat, args); this.firstEvent = null; @@ -80,7 +85,9 @@ public HistoricalLog(final int limit, final String idStringFormat, Object... arg * @param noteFormat {@link String#format} format string that describes the event * @param args for the format string, or nothing if none are required */ + @SuppressWarnings("AnnotateFormatMethod") public synchronized void recordEvent(final String noteFormat, Object... args) { + // Remove @SuppressWarnings after fixing https://github.com/apache/arrow/issues/41951 final String note = String.format(noteFormat, args); final Event event = new Event(note); if (firstEvent == null) { diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/MemoryUtil.java b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/MemoryUtil.java index 2f74a985a3ff4..727e3531ee83f 100644 --- a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/MemoryUtil.java +++ b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/MemoryUtil.java @@ -58,6 +58,7 @@ public class MemoryUtil { public static final boolean LITTLE_ENDIAN = ByteOrder.nativeOrder() == ByteOrder.LITTLE_ENDIAN; // Java 1.8, 9, 11, 17, 21 becomes 1, 9, 11, 17, and 21. + @SuppressWarnings("StringSplitter") private static final int majorVersion = Integer.parseInt(System.getProperty("java.specification.version").split("\\D+")[0]); diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/hash/MurmurHasher.java b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/hash/MurmurHasher.java index 5de98d23bb83b..9c5d0b9086113 100644 --- a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/hash/MurmurHasher.java +++ b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/hash/MurmurHasher.java @@ -162,7 +162,7 @@ public boolean equals(@Nullable Object o) { if (this == o) { return true; } - if (o == null || getClass() != o.getClass()) { + if (!(o instanceof MurmurHasher)) { return false; } MurmurHasher that = (MurmurHasher) o; diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/hash/SimpleHasher.java b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/hash/SimpleHasher.java index 3bf3c2a828338..670129d3fb2a2 100644 --- a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/hash/SimpleHasher.java +++ b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/hash/SimpleHasher.java @@ -53,6 +53,7 @@ protected SimpleHasher() { * @param length length of the memory region. * @return the hash code. */ + @Override public int hashCode(long address, long length) { int hashValue = 0; int index = 0; diff --git a/java/memory/memory-core/src/test/java/org/apache/arrow/memory/TestArrowBuf.java b/java/memory/memory-core/src/test/java/org/apache/arrow/memory/TestArrowBuf.java index b4385b72a38cf..f01d152f84bf3 100644 --- a/java/memory/memory-core/src/test/java/org/apache/arrow/memory/TestArrowBuf.java +++ b/java/memory/memory-core/src/test/java/org/apache/arrow/memory/TestArrowBuf.java @@ -141,7 +141,7 @@ public void testSetBytesBigEndian() { } @Test - /** + /* * Test that allocation history is not recorded even though * assertions are enabled in tests (GH-34338). */ diff --git a/java/memory/memory-core/src/test/java/org/apache/arrow/memory/TestBaseAllocator.java b/java/memory/memory-core/src/test/java/org/apache/arrow/memory/TestBaseAllocator.java index 535d5c15e8916..1f9e65831b438 100644 --- a/java/memory/memory-core/src/test/java/org/apache/arrow/memory/TestBaseAllocator.java +++ b/java/memory/memory-core/src/test/java/org/apache/arrow/memory/TestBaseAllocator.java @@ -374,7 +374,7 @@ public void testCustomizedAllocationManager() { assertEquals(1, arrowBuf1.getInt(0)); try { - final ArrowBuf arrowBuf2 = allocator.buffer(1); + allocator.buffer(1); fail("allocated memory beyond max allowed"); } catch (OutOfMemoryException e) { // expected @@ -1077,7 +1077,7 @@ public void testMemoryLeakWithReservation() throws Exception { "child2", 1024, MAX_ALLOCATION); rootAllocator.verify(); - ArrowBuf buff = childAllocator2.buffer(256); + childAllocator2.buffer(256); Exception exception = assertThrows(IllegalStateException.class, () -> { childAllocator2.close(); diff --git a/java/memory/memory-core/src/test/java/org/apache/arrow/memory/TestLowCostIdentityHashMap.java b/java/memory/memory-core/src/test/java/org/apache/arrow/memory/TestLowCostIdentityHashMap.java index 0cabc4a0571f2..234a6447ddb62 100644 --- a/java/memory/memory-core/src/test/java/org/apache/arrow/memory/TestLowCostIdentityHashMap.java +++ b/java/memory/memory-core/src/test/java/org/apache/arrow/memory/TestLowCostIdentityHashMap.java @@ -147,7 +147,7 @@ public void testLargeMap() throws Exception { assertTrue(hashMap.isEmpty()); } - private class StringWithKey implements ValueWithKeyIncluded { + private static class StringWithKey implements ValueWithKeyIncluded { private String myValue; private String myKey; diff --git a/java/memory/memory-core/src/test/java/org/apache/arrow/memory/util/TestArrowBufPointer.java b/java/memory/memory-core/src/test/java/org/apache/arrow/memory/util/TestArrowBufPointer.java index a1d5624a7e8c0..04e588ed16fc8 100644 --- a/java/memory/memory-core/src/test/java/org/apache/arrow/memory/util/TestArrowBufPointer.java +++ b/java/memory/memory-core/src/test/java/org/apache/arrow/memory/util/TestArrowBufPointer.java @@ -55,16 +55,16 @@ public void testArrowBufPointersEqual() { try (ArrowBuf buf1 = allocator.buffer(BUFFER_LENGTH); ArrowBuf buf2 = allocator.buffer(BUFFER_LENGTH)) { for (int i = 0; i < BUFFER_LENGTH / 4; i++) { - buf1.setInt(i * 4, i * 1234); - buf2.setInt(i * 4, i * 1234); + buf1.setInt(i * 4L, i * 1234); + buf2.setInt(i * 4L, i * 1234); } ArrowBufPointer ptr1 = new ArrowBufPointer(null, 0, 100); ArrowBufPointer ptr2 = new ArrowBufPointer(null, 100, 5032); assertTrue(ptr1.equals(ptr2)); for (int i = 0; i < BUFFER_LENGTH / 4; i++) { - ptr1.set(buf1, i * 4, 4); - ptr2.set(buf2, i * 4, 4); + ptr1.set(buf1, i * 4L, 4); + ptr2.set(buf2, i * 4L, 4); assertTrue(ptr1.equals(ptr2)); } } @@ -76,8 +76,8 @@ public void testArrowBufPointersHashCode() { try (ArrowBuf buf1 = allocator.buffer(vectorLength * 4); ArrowBuf buf2 = allocator.buffer(vectorLength * 4)) { for (int i = 0; i < vectorLength; i++) { - buf1.setInt(i * 4, i); - buf2.setInt(i * 4, i); + buf1.setInt(i * 4L, i); + buf2.setInt(i * 4L, i); } CounterHasher hasher1 = new CounterHasher(); @@ -90,8 +90,8 @@ public void testArrowBufPointersHashCode() { assertEquals(ArrowBufPointer.NULL_HASH_CODE, pointer2.hashCode()); for (int i = 0; i < vectorLength; i++) { - pointer1.set(buf1, i * 4, 4); - pointer2.set(buf2, i * 4, 4); + pointer1.set(buf1, i * 4L, 4); + pointer2.set(buf2, i * 4L, 4); assertEquals(pointer1.hashCode(), pointer2.hashCode()); @@ -188,7 +188,7 @@ public void testArrowBufPointersComparison() { * Hasher with a counter that increments each time a hash code is calculated. * This is to validate that the hash code in {@link ArrowBufPointer} is reused. */ - class CounterHasher implements ArrowBufHasher { + static class CounterHasher implements ArrowBufHasher { protected int counter = 0; @@ -211,7 +211,7 @@ public int hashCode() { @Override public boolean equals(Object o) { - return o != null && this.getClass() == o.getClass(); + return o instanceof CounterHasher; } } } diff --git a/java/memory/memory-core/src/test/java/org/apache/arrow/memory/util/TestByteFunctionHelpers.java b/java/memory/memory-core/src/test/java/org/apache/arrow/memory/util/TestByteFunctionHelpers.java index 04a715962dfe9..7a44a5f2d72fd 100644 --- a/java/memory/memory-core/src/test/java/org/apache/arrow/memory/util/TestByteFunctionHelpers.java +++ b/java/memory/memory-core/src/test/java/org/apache/arrow/memory/util/TestByteFunctionHelpers.java @@ -19,6 +19,8 @@ import static org.junit.Assert.assertEquals; +import java.nio.charset.StandardCharsets; + import org.apache.arrow.memory.ArrowBuf; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; @@ -121,9 +123,9 @@ public void testStringCompare() { String rightStr = rightStrings[i]; ArrowBuf left = allocator.buffer(SIZE); - left.setBytes(0, leftStr.getBytes()); + left.setBytes(0, leftStr.getBytes(StandardCharsets.UTF_8)); ArrowBuf right = allocator.buffer(SIZE); - right.setBytes(0, rightStr.getBytes()); + right.setBytes(0, rightStr.getBytes(StandardCharsets.UTF_8)); assertEquals(leftStr.compareTo(rightStr) < 0 ? -1 : 1, ByteFunctionHelpers.compare(left, 0, leftStr.length(), right, 0, rightStr.length())); diff --git a/java/memory/memory-core/src/test/java/org/apache/arrow/memory/util/hash/TestArrowBufHasher.java b/java/memory/memory-core/src/test/java/org/apache/arrow/memory/util/hash/TestArrowBufHasher.java index 3da0602bdfd9c..cc5ce49e54828 100644 --- a/java/memory/memory-core/src/test/java/org/apache/arrow/memory/util/hash/TestArrowBufHasher.java +++ b/java/memory/memory-core/src/test/java/org/apache/arrow/memory/util/hash/TestArrowBufHasher.java @@ -66,8 +66,8 @@ public void testHasher() { ArrowBuf buf2 = allocator.buffer(BUFFER_LENGTH)) { // prepare data for (int i = 0; i < BUFFER_LENGTH / 4; i++) { - buf1.setFloat(i * 4, i / 10.0f); - buf2.setFloat(i * 4, i / 10.0f); + buf1.setFloat(i * 4L, i / 10.0f); + buf2.setFloat(i * 4L, i / 10.0f); } verifyHashCodesEqual(buf1, 0, 100, buf2, 0, 100); @@ -95,7 +95,7 @@ public void testHasherNegative() { try (ArrowBuf buf = allocator.buffer(BUFFER_LENGTH)) { // prepare data for (int i = 0; i < BUFFER_LENGTH / 4; i++) { - buf.setFloat(i * 4, i / 10.0f); + buf.setFloat(i * 4L, i / 10.0f); } assertThrows(IllegalArgumentException.class, () -> { @@ -120,13 +120,13 @@ public void testHasherLessThanInt() { buf2.writeBytes("bar2".getBytes(StandardCharsets.UTF_8)); for (int i = 1; i <= 4; i ++) { - verifyHashCodeNotEqual(buf1, 0, i, buf2, 0, i); + verifyHashCodeNotEqual(buf1, i, buf2, i); } } } - private void verifyHashCodeNotEqual(ArrowBuf buf1, int offset1, int length1, - ArrowBuf buf2, int offset2, int length2) { + private void verifyHashCodeNotEqual(ArrowBuf buf1, int length1, + ArrowBuf buf2, int length2) { int hashCode1 = hasher.hashCode(buf1, 0, length1); int hashCode2 = hasher.hashCode(buf2, 0, length2); assertNotEquals(hashCode1, hashCode2); diff --git a/java/memory/memory-netty-buffer-patch/src/main/java/io/netty/buffer/NettyArrowBuf.java b/java/memory/memory-netty-buffer-patch/src/main/java/io/netty/buffer/NettyArrowBuf.java index 466444c7d53e8..ae1f30a868406 100644 --- a/java/memory/memory-netty-buffer-patch/src/main/java/io/netty/buffer/NettyArrowBuf.java +++ b/java/memory/memory-netty-buffer-patch/src/main/java/io/netty/buffer/NettyArrowBuf.java @@ -413,7 +413,7 @@ protected int _getUnsignedMedium(int index) { protected int _getUnsignedMediumLE(int index) { this.chk(index, 3); long addr = this.addr(index); - return PlatformDependent.getByte(addr) & 255 | + return (PlatformDependent.getByte(addr) & 255) | (Short.reverseBytes(PlatformDependent.getShort(addr + 1L)) & '\uffff') << 8; } diff --git a/java/memory/memory-netty-buffer-patch/src/main/java/io/netty/buffer/UnsafeDirectLittleEndian.java b/java/memory/memory-netty-buffer-patch/src/main/java/io/netty/buffer/UnsafeDirectLittleEndian.java index e51c6c3d48882..ab0cd0c9e6e50 100644 --- a/java/memory/memory-netty-buffer-patch/src/main/java/io/netty/buffer/UnsafeDirectLittleEndian.java +++ b/java/memory/memory-netty-buffer-patch/src/main/java/io/netty/buffer/UnsafeDirectLittleEndian.java @@ -36,19 +36,12 @@ public class UnsafeDirectLittleEndian extends WrappedByteBuf { private final AbstractByteBuf wrapped; private final long memoryAddress; - UnsafeDirectLittleEndian(DuplicatedByteBuf buf) { - this(buf, true); - } - - UnsafeDirectLittleEndian(LargeBuffer buf) { - this(buf, true); - } - - UnsafeDirectLittleEndian(PooledUnsafeDirectByteBuf buf) { - this(buf, true); - } - - private UnsafeDirectLittleEndian(AbstractByteBuf buf, boolean fake) { + /** + * Constructs a new instance. + * + * @param buf The buffer to wrap + */ + public UnsafeDirectLittleEndian(AbstractByteBuf buf) { super(buf); this.wrapped = buf; diff --git a/java/memory/memory-netty-buffer-patch/src/test/java/io/netty/buffer/TestUnsafeDirectLittleEndian.java b/java/memory/memory-netty-buffer-patch/src/test/java/io/netty/buffer/TestUnsafeDirectLittleEndian.java index 043c2c1605a63..4717e48f27bef 100644 --- a/java/memory/memory-netty-buffer-patch/src/test/java/io/netty/buffer/TestUnsafeDirectLittleEndian.java +++ b/java/memory/memory-netty-buffer-patch/src/test/java/io/netty/buffer/TestUnsafeDirectLittleEndian.java @@ -23,7 +23,6 @@ import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.IOException; -import java.nio.ByteOrder; import java.nio.charset.StandardCharsets; import org.junit.Test; @@ -34,9 +33,9 @@ import io.netty.buffer.UnsafeDirectLittleEndian; public class TestUnsafeDirectLittleEndian { - private static final boolean LITTLE_ENDIAN = ByteOrder.nativeOrder() == ByteOrder.LITTLE_ENDIAN; @Test + @SuppressWarnings("CatchAndPrintStackTrace") public void testPrimitiveGetSet() { ByteBuf byteBuf = Unpooled.directBuffer(64); UnsafeDirectLittleEndian unsafeDirect = new UnsafeDirectLittleEndian(new LargeBuffer(byteBuf)); diff --git a/java/memory/memory-netty/pom.xml b/java/memory/memory-netty/pom.xml index bb4ccd6c26f2a..d815276b09e50 100644 --- a/java/memory/memory-netty/pom.xml +++ b/java/memory/memory-netty/pom.xml @@ -53,7 +53,7 @@ org.immutables - value + value-annotations diff --git a/java/memory/memory-netty/src/main/java/org/apache/arrow/memory/netty/NettyAllocationManager.java b/java/memory/memory-netty/src/main/java/org/apache/arrow/memory/netty/NettyAllocationManager.java index 58354d0c2eebd..1e4e06df7e9ac 100644 --- a/java/memory/memory-netty/src/main/java/org/apache/arrow/memory/netty/NettyAllocationManager.java +++ b/java/memory/memory-netty/src/main/java/org/apache/arrow/memory/netty/NettyAllocationManager.java @@ -68,11 +68,9 @@ public ArrowBuf empty() { /** * The cut-off value for switching allocation strategies. */ - private final int allocationCutOffValue; NettyAllocationManager(BufferAllocator accountingAllocator, long requestedSize, int allocationCutOffValue) { super(accountingAllocator); - this.allocationCutOffValue = allocationCutOffValue; if (requestedSize > allocationCutOffValue) { this.memoryChunk = null; @@ -92,7 +90,7 @@ public ArrowBuf empty() { /** * Get the underlying memory chunk managed by this AllocationManager. * @return the underlying memory chunk if the request size is not greater than the - * {@link NettyAllocationManager#allocationCutOffValue}, or null otherwise. + * cutoff value provided in the constructor , or null otherwise. * * @deprecated this method will be removed in a future release. */ diff --git a/java/memory/memory-netty/src/test/java/org/apache/arrow/memory/netty/TestEndianness.java b/java/memory/memory-netty/src/test/java/org/apache/arrow/memory/netty/TestEndianness.java index a782523cbc6d6..0c99062021f39 100644 --- a/java/memory/memory-netty/src/test/java/org/apache/arrow/memory/netty/TestEndianness.java +++ b/java/memory/memory-netty/src/test/java/org/apache/arrow/memory/netty/TestEndianness.java @@ -36,15 +36,15 @@ public void testNativeEndian() { final ByteBuf b = NettyArrowBuf.unwrapBuffer(a.buffer(4)); b.setInt(0, 35); if (ByteOrder.nativeOrder() == ByteOrder.LITTLE_ENDIAN) { - assertEquals(b.getByte(0), 35); - assertEquals(b.getByte(1), 0); - assertEquals(b.getByte(2), 0); - assertEquals(b.getByte(3), 0); + assertEquals(35, b.getByte(0)); + assertEquals(0, b.getByte(1)); + assertEquals(0, b.getByte(2)); + assertEquals(0, b.getByte(3)); } else { - assertEquals(b.getByte(0), 0); - assertEquals(b.getByte(1), 0); - assertEquals(b.getByte(2), 0); - assertEquals(b.getByte(3), 35); + assertEquals(0, b.getByte(0)); + assertEquals(0, b.getByte(1)); + assertEquals(0, b.getByte(2)); + assertEquals(35, b.getByte(3)); } b.release(); a.close(); diff --git a/java/memory/memory-netty/src/test/java/org/apache/arrow/memory/netty/TestNettyAllocator.java b/java/memory/memory-netty/src/test/java/org/apache/arrow/memory/netty/TestNettyAllocator.java index a6da36bb35aa7..792ae53a9404d 100644 --- a/java/memory/memory-netty/src/test/java/org/apache/arrow/memory/netty/TestNettyAllocator.java +++ b/java/memory/memory-netty/src/test/java/org/apache/arrow/memory/netty/TestNettyAllocator.java @@ -39,6 +39,7 @@ public class TestNettyAllocator { @Test + @SuppressWarnings("SynchronizeOnNonFinalField") public void testMemoryUsage() { ListAppender memoryLogsAppender = new ListAppender<>(); memoryLogsAppender.list = Collections.synchronizedList(memoryLogsAppender.list); diff --git a/java/memory/memory-unsafe/pom.xml b/java/memory/memory-unsafe/pom.xml index 502ac2cc8051a..f1aa8fde1faa1 100644 --- a/java/memory/memory-unsafe/pom.xml +++ b/java/memory/memory-unsafe/pom.xml @@ -28,7 +28,7 @@ org.immutables - value + value-annotations diff --git a/java/performance/pom.xml b/java/performance/pom.xml index 1e99a29265724..07ca8d1e61d48 100644 --- a/java/performance/pom.xml +++ b/java/performance/pom.xml @@ -22,9 +22,7 @@ JMH Performance benchmarks for other Arrow libraries. - UTF-8 1.37 - 1.8 benchmarks true .* @@ -42,104 +40,59 @@ org.openjdk.jmh jmh-core ${jmh.version} - test - - - org.openjdk.jmh - jmh-generator-annprocess - ${jmh.version} - provided org.apache.arrow arrow-vector ${arrow.vector.classifier} - test org.apache.arrow arrow-memory-core - test org.apache.arrow arrow-memory-netty - test + runtime org.apache.avro avro ${dep.avro.version} - test org.apache.arrow arrow-avro - test com.h2database h2 2.2.224 - test + runtime org.apache.arrow arrow-jdbc - test org.apache.arrow arrow-algorithm - test - - - - maven-clean-plugin - 3.3.2 - - - maven-deploy-plugin - 3.1.1 - - - maven-install-plugin - 3.1.1 - - - maven-jar-plugin - 3.3.0 - - - maven-javadoc-plugin - 3.6.3 - - - maven-resources-plugin - 3.3.1 - - - maven-source-plugin - 2.2.1 - - - maven-surefire-plugin - 3.2.5 - - - org.apache.maven.plugins maven-compiler-plugin - - ${javac.target} - ${javac.target} - ${javac.target} + + + + org.openjdk.jmh + jmh-generator-annprocess + ${jmh.version} + + @@ -153,6 +106,7 @@ package ${uberjar.name} + false org.openjdk.jmh.Main @@ -175,7 +129,6 @@ org.codehaus.mojo exec-maven-plugin - 3.2.0 ${skip.perf.benchmarks} test @@ -212,5 +165,4 @@ - diff --git a/java/performance/src/test/java/org/apache/arrow/adapter/AvroAdapterBenchmarks.java b/java/performance/src/main/java/org/apache/arrow/adapter/AvroAdapterBenchmarks.java similarity index 100% rename from java/performance/src/test/java/org/apache/arrow/adapter/AvroAdapterBenchmarks.java rename to java/performance/src/main/java/org/apache/arrow/adapter/AvroAdapterBenchmarks.java diff --git a/java/performance/src/test/java/org/apache/arrow/adapter/jdbc/JdbcAdapterBenchmarks.java b/java/performance/src/main/java/org/apache/arrow/adapter/jdbc/JdbcAdapterBenchmarks.java similarity index 99% rename from java/performance/src/test/java/org/apache/arrow/adapter/jdbc/JdbcAdapterBenchmarks.java rename to java/performance/src/main/java/org/apache/arrow/adapter/jdbc/JdbcAdapterBenchmarks.java index fd3940b4c872c..f6dab83b7cd0c 100644 --- a/java/performance/src/test/java/org/apache/arrow/adapter/jdbc/JdbcAdapterBenchmarks.java +++ b/java/performance/src/main/java/org/apache/arrow/adapter/jdbc/JdbcAdapterBenchmarks.java @@ -54,6 +54,7 @@ * Benchmarks for Jdbc adapter. */ public class JdbcAdapterBenchmarks { + // checkstyle:off: MissingJavadocMethod private static final int VALUE_COUNT = 3000; @@ -355,5 +356,6 @@ public static void main(String[] args) throws RunnerException { new Runner(opt).run(); } + // checkstyle:on: MissingJavadocMethod } diff --git a/java/performance/src/test/java/org/apache/arrow/algorithm/search/ParallelSearcherBenchmarks.java b/java/performance/src/main/java/org/apache/arrow/algorithm/search/ParallelSearcherBenchmarks.java similarity index 97% rename from java/performance/src/test/java/org/apache/arrow/algorithm/search/ParallelSearcherBenchmarks.java rename to java/performance/src/main/java/org/apache/arrow/algorithm/search/ParallelSearcherBenchmarks.java index 1c3af77e73a05..c9fc5cc4bef9c 100644 --- a/java/performance/src/test/java/org/apache/arrow/algorithm/search/ParallelSearcherBenchmarks.java +++ b/java/performance/src/main/java/org/apache/arrow/algorithm/search/ParallelSearcherBenchmarks.java @@ -43,6 +43,7 @@ * Benchmarks for {@link ParallelSearcher}. */ public class ParallelSearcherBenchmarks { + // checkstyle:off: MissingJavadocMethod private static final int VECTOR_LENGTH = 1024 * 1024; @@ -112,4 +113,5 @@ public static void main(String[] args) throws RunnerException { new Runner(opt).run(); } + // checkstyle:on: MissingJavadocMethod } diff --git a/java/performance/src/test/java/org/apache/arrow/memory/AllocatorBenchmarks.java b/java/performance/src/main/java/org/apache/arrow/memory/AllocatorBenchmarks.java similarity index 100% rename from java/performance/src/test/java/org/apache/arrow/memory/AllocatorBenchmarks.java rename to java/performance/src/main/java/org/apache/arrow/memory/AllocatorBenchmarks.java diff --git a/java/performance/src/test/java/org/apache/arrow/memory/ArrowBufBenchmarks.java b/java/performance/src/main/java/org/apache/arrow/memory/ArrowBufBenchmarks.java similarity index 100% rename from java/performance/src/test/java/org/apache/arrow/memory/ArrowBufBenchmarks.java rename to java/performance/src/main/java/org/apache/arrow/memory/ArrowBufBenchmarks.java diff --git a/java/performance/src/test/java/org/apache/arrow/memory/util/ArrowBufPointerBenchmarks.java b/java/performance/src/main/java/org/apache/arrow/memory/util/ArrowBufPointerBenchmarks.java similarity index 100% rename from java/performance/src/test/java/org/apache/arrow/memory/util/ArrowBufPointerBenchmarks.java rename to java/performance/src/main/java/org/apache/arrow/memory/util/ArrowBufPointerBenchmarks.java diff --git a/java/performance/src/test/java/org/apache/arrow/memory/util/ByteFunctionHelpersBenchmarks.java b/java/performance/src/main/java/org/apache/arrow/memory/util/ByteFunctionHelpersBenchmarks.java similarity index 98% rename from java/performance/src/test/java/org/apache/arrow/memory/util/ByteFunctionHelpersBenchmarks.java rename to java/performance/src/main/java/org/apache/arrow/memory/util/ByteFunctionHelpersBenchmarks.java index 4d0dfcb5da80d..f1dc2d79eff83 100644 --- a/java/performance/src/test/java/org/apache/arrow/memory/util/ByteFunctionHelpersBenchmarks.java +++ b/java/performance/src/main/java/org/apache/arrow/memory/util/ByteFunctionHelpersBenchmarks.java @@ -48,7 +48,7 @@ public class ByteFunctionHelpersBenchmarks { */ @State(Scope.Benchmark) public static class ArrowEqualState { - + // checkstyle:off: MissingJavadocMethod private static final int BUFFER_CAPACITY = 7; private BufferAllocator allocator; @@ -135,4 +135,5 @@ public static void main(String[] args) throws RunnerException { new Runner(opt).run(); } + // checkstyle:on: MissingJavadocMethod } diff --git a/java/performance/src/test/java/org/apache/arrow/vector/BaseValueVectorBenchmarks.java b/java/performance/src/main/java/org/apache/arrow/vector/BaseValueVectorBenchmarks.java similarity index 100% rename from java/performance/src/test/java/org/apache/arrow/vector/BaseValueVectorBenchmarks.java rename to java/performance/src/main/java/org/apache/arrow/vector/BaseValueVectorBenchmarks.java diff --git a/java/performance/src/test/java/org/apache/arrow/vector/BitVectorHelperBenchmarks.java b/java/performance/src/main/java/org/apache/arrow/vector/BitVectorHelperBenchmarks.java similarity index 98% rename from java/performance/src/test/java/org/apache/arrow/vector/BitVectorHelperBenchmarks.java rename to java/performance/src/main/java/org/apache/arrow/vector/BitVectorHelperBenchmarks.java index 5f6e5ca28fbab..e29b889c6e7a8 100644 --- a/java/performance/src/test/java/org/apache/arrow/vector/BitVectorHelperBenchmarks.java +++ b/java/performance/src/main/java/org/apache/arrow/vector/BitVectorHelperBenchmarks.java @@ -41,6 +41,7 @@ * Benchmarks for {@link BitVectorHelper}. */ public class BitVectorHelperBenchmarks { + // checkstyle:off: MissingJavadocMethod /** * State object for general benchmarks. @@ -226,4 +227,5 @@ public static void main(String [] args) throws RunnerException { new Runner(opt).run(); } + // checkstyle:on: MissingJavadocMethod } diff --git a/java/performance/src/test/java/org/apache/arrow/vector/DecimalVectorBenchmarks.java b/java/performance/src/main/java/org/apache/arrow/vector/DecimalVectorBenchmarks.java similarity index 100% rename from java/performance/src/test/java/org/apache/arrow/vector/DecimalVectorBenchmarks.java rename to java/performance/src/main/java/org/apache/arrow/vector/DecimalVectorBenchmarks.java diff --git a/java/performance/src/test/java/org/apache/arrow/vector/Float8Benchmarks.java b/java/performance/src/main/java/org/apache/arrow/vector/Float8Benchmarks.java similarity index 97% rename from java/performance/src/test/java/org/apache/arrow/vector/Float8Benchmarks.java rename to java/performance/src/main/java/org/apache/arrow/vector/Float8Benchmarks.java index 874e0d9f82ee7..36a633e5e1b6e 100644 --- a/java/performance/src/test/java/org/apache/arrow/vector/Float8Benchmarks.java +++ b/java/performance/src/main/java/org/apache/arrow/vector/Float8Benchmarks.java @@ -40,6 +40,7 @@ */ @State(Scope.Benchmark) public class Float8Benchmarks { + // checkstyle:off: MissingJavadocMethod private static final int VECTOR_LENGTH = 1024; @@ -119,4 +120,5 @@ public static void main(String [] args) throws RunnerException { new Runner(opt).run(); } + // checkstyle:on: MissingJavadocMethod } diff --git a/java/performance/src/test/java/org/apache/arrow/vector/FloatingPointBenchmarks.java b/java/performance/src/main/java/org/apache/arrow/vector/FloatingPointBenchmarks.java similarity index 98% rename from java/performance/src/test/java/org/apache/arrow/vector/FloatingPointBenchmarks.java rename to java/performance/src/main/java/org/apache/arrow/vector/FloatingPointBenchmarks.java index 079672e9f2a98..2938591737f06 100644 --- a/java/performance/src/test/java/org/apache/arrow/vector/FloatingPointBenchmarks.java +++ b/java/performance/src/main/java/org/apache/arrow/vector/FloatingPointBenchmarks.java @@ -41,6 +41,7 @@ */ @State(Scope.Benchmark) public class FloatingPointBenchmarks { + // checkstyle:off: MissingJavadocMethod private static final int VECTOR_LENGTH = 1024; @@ -130,5 +131,6 @@ public static void main(String [] args) throws RunnerException { new Runner(opt).run(); } + // checkstyle:on: MissingJavadocMethod } diff --git a/java/performance/src/test/java/org/apache/arrow/vector/IntBenchmarks.java b/java/performance/src/main/java/org/apache/arrow/vector/IntBenchmarks.java similarity index 97% rename from java/performance/src/test/java/org/apache/arrow/vector/IntBenchmarks.java rename to java/performance/src/main/java/org/apache/arrow/vector/IntBenchmarks.java index 036768d445e55..99674058970a6 100644 --- a/java/performance/src/test/java/org/apache/arrow/vector/IntBenchmarks.java +++ b/java/performance/src/main/java/org/apache/arrow/vector/IntBenchmarks.java @@ -41,6 +41,7 @@ */ @State(Scope.Benchmark) public class IntBenchmarks { + // checkstyle:off: MissingJavadocMethod private static final int VECTOR_LENGTH = 1024; @@ -107,4 +108,5 @@ public static void main(String [] args) throws RunnerException { new Runner(opt).run(); } + // checkstyle:on: MissingJavadocMethod } diff --git a/java/performance/src/test/java/org/apache/arrow/vector/VarCharBenchmarks.java b/java/performance/src/main/java/org/apache/arrow/vector/VarCharBenchmarks.java similarity index 97% rename from java/performance/src/test/java/org/apache/arrow/vector/VarCharBenchmarks.java rename to java/performance/src/main/java/org/apache/arrow/vector/VarCharBenchmarks.java index 1ab4b7bc20dad..a7ce4e04fee87 100644 --- a/java/performance/src/test/java/org/apache/arrow/vector/VarCharBenchmarks.java +++ b/java/performance/src/main/java/org/apache/arrow/vector/VarCharBenchmarks.java @@ -39,6 +39,7 @@ */ @State(Scope.Benchmark) public class VarCharBenchmarks { + // checkstyle:off: MissingJavadocMethod private static final int VECTOR_LENGTH = 1024; @@ -99,4 +100,5 @@ public static void main(String [] args) throws RunnerException { new Runner(opt).run(); } + // checkstyle:on: MissingJavadocMethod } diff --git a/java/performance/src/test/java/org/apache/arrow/vector/VariableWidthVectorBenchmarks.java b/java/performance/src/main/java/org/apache/arrow/vector/VariableWidthVectorBenchmarks.java similarity index 97% rename from java/performance/src/test/java/org/apache/arrow/vector/VariableWidthVectorBenchmarks.java rename to java/performance/src/main/java/org/apache/arrow/vector/VariableWidthVectorBenchmarks.java index 7eee981f13327..62c54606e6da6 100644 --- a/java/performance/src/test/java/org/apache/arrow/vector/VariableWidthVectorBenchmarks.java +++ b/java/performance/src/main/java/org/apache/arrow/vector/VariableWidthVectorBenchmarks.java @@ -41,6 +41,7 @@ */ @State(Scope.Benchmark) public class VariableWidthVectorBenchmarks { + // checkstyle:off: MissingJavadocMethod private static final int VECTOR_CAPACITY = 16 * 1024; @@ -127,4 +128,5 @@ public static void main(String [] args) throws RunnerException { new Runner(opt).run(); } + // checkstyle:on: MissingJavadocMethod } diff --git a/java/performance/src/test/java/org/apache/arrow/vector/VectorLoaderBenchmark.java b/java/performance/src/main/java/org/apache/arrow/vector/VectorLoaderBenchmark.java similarity index 97% rename from java/performance/src/test/java/org/apache/arrow/vector/VectorLoaderBenchmark.java rename to java/performance/src/main/java/org/apache/arrow/vector/VectorLoaderBenchmark.java index 416d126419e56..e8e8c0cfbc1f3 100644 --- a/java/performance/src/test/java/org/apache/arrow/vector/VectorLoaderBenchmark.java +++ b/java/performance/src/main/java/org/apache/arrow/vector/VectorLoaderBenchmark.java @@ -40,6 +40,7 @@ * Benchmarks for {@link VectorLoader}. */ public class VectorLoaderBenchmark { + // checkstyle:off: MissingJavadocMethod private static final int ALLOCATOR_CAPACITY = 1024 * 1024; @@ -114,4 +115,5 @@ public static void main(String[] args) throws RunnerException { new Runner(opt).run(); } + // checkstyle:on: MissingJavadocMethod } diff --git a/java/performance/src/test/java/org/apache/arrow/vector/VectorUnloaderBenchmark.java b/java/performance/src/main/java/org/apache/arrow/vector/VectorUnloaderBenchmark.java similarity index 97% rename from java/performance/src/test/java/org/apache/arrow/vector/VectorUnloaderBenchmark.java rename to java/performance/src/main/java/org/apache/arrow/vector/VectorUnloaderBenchmark.java index d125172450004..b464f888fa85f 100644 --- a/java/performance/src/test/java/org/apache/arrow/vector/VectorUnloaderBenchmark.java +++ b/java/performance/src/main/java/org/apache/arrow/vector/VectorUnloaderBenchmark.java @@ -41,6 +41,7 @@ */ @State(Scope.Benchmark) public class VectorUnloaderBenchmark { + // checkstyle:off: MissingJavadocMethod private static final int ALLOCATOR_CAPACITY = 1024 * 1024; @@ -106,4 +107,5 @@ public static void main(String[] args) throws RunnerException { new Runner(opt).run(); } + // checkstyle:on: MissingJavadocMethod } diff --git a/java/performance/src/test/java/org/apache/arrow/vector/dictionary/DictionaryEncoderBenchmarks.java b/java/performance/src/main/java/org/apache/arrow/vector/dictionary/DictionaryEncoderBenchmarks.java similarity index 100% rename from java/performance/src/test/java/org/apache/arrow/vector/dictionary/DictionaryEncoderBenchmarks.java rename to java/performance/src/main/java/org/apache/arrow/vector/dictionary/DictionaryEncoderBenchmarks.java diff --git a/java/performance/src/test/java/org/apache/arrow/vector/ipc/WriteChannelBenchmark.java b/java/performance/src/main/java/org/apache/arrow/vector/ipc/WriteChannelBenchmark.java similarity index 97% rename from java/performance/src/test/java/org/apache/arrow/vector/ipc/WriteChannelBenchmark.java rename to java/performance/src/main/java/org/apache/arrow/vector/ipc/WriteChannelBenchmark.java index 7a2537cbb8820..18efff11db9ff 100644 --- a/java/performance/src/test/java/org/apache/arrow/vector/ipc/WriteChannelBenchmark.java +++ b/java/performance/src/main/java/org/apache/arrow/vector/ipc/WriteChannelBenchmark.java @@ -41,6 +41,7 @@ * Benchmarks for {@link WriteChannel}. */ public class WriteChannelBenchmark { + // checkstyle:off: MissingJavadocMethod /** * State object for align benchmark. @@ -84,4 +85,5 @@ public static void main(String[] args) throws RunnerException { new Runner(opt).run(); } + // checkstyle:on: MissingJavadocMethod } diff --git a/java/performance/src/test/java/org/apache/arrow/vector/ipc/message/ArrowRecordBatchBenchmarks.java b/java/performance/src/main/java/org/apache/arrow/vector/ipc/message/ArrowRecordBatchBenchmarks.java similarity index 97% rename from java/performance/src/test/java/org/apache/arrow/vector/ipc/message/ArrowRecordBatchBenchmarks.java rename to java/performance/src/main/java/org/apache/arrow/vector/ipc/message/ArrowRecordBatchBenchmarks.java index c0882821e9cc4..b608bb4c1c590 100644 --- a/java/performance/src/test/java/org/apache/arrow/vector/ipc/message/ArrowRecordBatchBenchmarks.java +++ b/java/performance/src/main/java/org/apache/arrow/vector/ipc/message/ArrowRecordBatchBenchmarks.java @@ -42,6 +42,7 @@ */ @State(Scope.Benchmark) public class ArrowRecordBatchBenchmarks { + // checkstyle:off: MissingJavadocMethod private static final int VECTOR_CAPACITY = 16 * 1024; @@ -95,4 +96,5 @@ public static void main(String [] args) throws RunnerException { new Runner(opt).run(); } + // checkstyle:on: MissingJavadocMethod } diff --git a/java/performance/src/test/java/org/apache/arrow/vector/util/TransferPairBenchmarks.java b/java/performance/src/main/java/org/apache/arrow/vector/util/TransferPairBenchmarks.java similarity index 97% rename from java/performance/src/test/java/org/apache/arrow/vector/util/TransferPairBenchmarks.java rename to java/performance/src/main/java/org/apache/arrow/vector/util/TransferPairBenchmarks.java index 5142f4bdb8d0d..486862859f122 100644 --- a/java/performance/src/test/java/org/apache/arrow/vector/util/TransferPairBenchmarks.java +++ b/java/performance/src/main/java/org/apache/arrow/vector/util/TransferPairBenchmarks.java @@ -42,6 +42,7 @@ */ @State(Scope.Benchmark) public class TransferPairBenchmarks { + // checkstyle:off: MissingJavadocMethod private static final int VECTOR_LENGTH = 1024; @@ -120,4 +121,5 @@ public static void main(String [] args) throws RunnerException { new Runner(opt).run(); } + // checkstyle:on: MissingJavadocMethod } diff --git a/java/pom.xml b/java/pom.xml index 16564ae828b0f..085546573596a 100644 --- a/java/pom.xml +++ b/java/pom.xml @@ -15,7 +15,7 @@ org.apache apache - 18 + 31 org.apache.arrow @@ -81,24 +81,45 @@ ${project.build.directory}/generated-sources 1.9.0 5.10.2 - 2.0.12 - 33.0.0-jre + 2.0.13 + 33.2.1-jre 4.1.108.Final 1.63.0 - 3.23.1 + 3.25.1 2.17.0 3.4.0 23.5.26 1.11.3 2 + dev/checkstyle/checkstyle.xml true 9+181-r4173-1 - 2.24.0 - 3.12.1 + 2.28.0 5.11.0 5.2.0 - 3.42.0 + 3.43.0 + + **/*.java + none + -Xdoclint:none + + 1.8 + 1.8 + 3.12.0 + 3.2.5 + 0.16.1 + 3.7.1 + 3.12.1 + 3.6.1 + 3.2.4 + + 3.2.2 + 3.6.3 + 3.5.0 @@ -115,6 +136,7 @@ org.checkerframework checker-qual ${checker.framework.version} + provided com.google.flatbuffers @@ -149,8 +171,8 @@ org.immutables - value - 2.10.0 + value-annotations + 2.10.1 provided @@ -268,40 +290,16 @@ 8.3.0 test - - - - - - org.apache.maven.plugins - maven-dependency-plugin - - 3.1.2 - - - org.apache.rat - apache-rat-plugin - 0.16.1 - - - org.apache.maven.plugins - maven-resources-plugin - 3.3.1 - org.apache.maven.plugins maven-compiler-plugin - ${maven-compiler-plugin.version} + true **/module-info.java **/module-info.java false @@ -309,23 +307,13 @@ org.immutables value - 2.10.0 + 2.10.1 - - maven-enforcer-plugin - 3.4.1 - - - org.apache.maven.plugins - maven-shade-plugin - 3.5.1 - maven-surefire-plugin - 3.2.5 true true @@ -340,22 +328,9 @@ 1048576 - - - org.junit.jupiter - junit-jupiter-engine - ${dep.junit.jupiter.version} - - - org.apache.maven.surefire - surefire-junit-platform - 3.2.5 - - maven-failsafe-plugin - 3.2.5 ${project.build.directory} @@ -367,7 +342,7 @@ org.jacoco jacoco-maven-plugin - 0.8.11 + 0.8.12 + org.immutables:value + org.checkerframework:checker @@ -679,8 +661,6 @@ pl.project13.maven git-commit-id-plugin - 4.0.5 - dd.MM.yyyy '@' HH:mm:ss z false @@ -722,11 +702,10 @@ org.apache.maven.plugins maven-checkstyle-plugin - 3.1.0 **/module-info.java - dev/checkstyle/checkstyle.xml - dev/checkstyle/checkstyle.license + ${checkstyle.config.location} + dev/license/asf-java.license dev/checkstyle/suppressions.xml true UTF-8 @@ -777,7 +756,6 @@ javax.annotation:javax.annotation-api:* org.apache.hadoop:hadoop-client-api - org.checkerframework:checker-qual @@ -786,7 +764,6 @@ org.cyclonedx cyclonedx-maven-plugin - 2.7.11 @@ -817,12 +794,10 @@ org.apache.maven.plugins maven-project-info-reports-plugin - 3.5.0 org.apache.maven.plugins maven-site-plugin - 3.12.1 com.diffplug.spotless @@ -831,6 +806,19 @@ + + + ${spotless.java.excludes} + + + 1.7 + + + + ${maven.multiModuleProjectDirectory}/dev/license/asf-java.license + package + + @@ -857,7 +845,6 @@ org.apache.maven.plugins maven-javadoc-plugin - 3.6.3 **/module-info.java @@ -885,28 +872,15 @@ org.apache.maven.plugins maven-project-info-reports-plugin - 3.5.0 org.apache.maven.plugins maven-site-plugin - 3.12.1 - - java-nodoclint - - [1.8,) - - - none - -Xdoclint:none - - - arrow-c-data @@ -941,6 +915,9 @@ Do not activate Error Prone while running with Eclipse/M2E as it causes incompatibilities with other annotation processors. See https://github.com/jbosstools/m2e-apt/issues/62 for details + + Note: Maintaining error-prone version with JDK8 + See https://github.com/google/error-prone/blob/f8e33bc460be82ab22256a7ef8b979d7a2cacaba/docs/installation.md#jdk-8 --> 1.8 @@ -954,7 +931,6 @@ org.apache.maven.plugins maven-compiler-plugin - true -XDcompilePolicy=simple -Xplugin:ErrorProne @@ -964,6 +940,13 @@ com.google.errorprone error_prone_core + 2.10.0 @@ -987,12 +970,9 @@ org.apache.maven.plugins maven-compiler-plugin - 8 - 8 - UTF-8 -XDcompilePolicy=simple - -Xplugin:ErrorProne -XepExcludedPaths:.*/(target/generated-sources)/.* + -Xplugin:ErrorProne -XepExcludedPaths:.*/(target/generated-source|format/src/main/java/org/apache/arrow/flatbuf)/.* -J--add-exports=jdk.compiler/com.sun.tools.javac.api=ALL-UNNAMED -J--add-exports=jdk.compiler/com.sun.tools.javac.file=ALL-UNNAMED -J--add-exports=jdk.compiler/com.sun.tools.javac.main=ALL-UNNAMED @@ -1013,6 +993,16 @@ + + + + + jdk11+ + + [11,] + + + org.apache.maven.plugins maven-surefire-plugin @@ -1020,6 +1010,13 @@ --add-opens=java.base/java.nio=org.apache.arrow.memory.core,ALL-UNNAMED + + org.apache.maven.plugins + maven-failsafe-plugin + + --add-opens=java.base/java.nio=org.apache.arrow.memory.core,ALL-UNNAMED + + @@ -1060,7 +1057,6 @@ org.jacoco jacoco-maven-plugin - 0.8.11 @@ -1106,7 +1102,6 @@ org.codehaus.mojo exec-maven-plugin - 3.2.0 cdata-cmake @@ -1163,7 +1158,6 @@ org.codehaus.mojo exec-maven-plugin - 3.2.0 jni-cpp-cmake @@ -1270,7 +1264,6 @@ org.codehaus.mojo exec-maven-plugin - 3.2.0 jni-cpp-cmake @@ -1360,5 +1353,4 @@ - diff --git a/java/tools/pom.xml b/java/tools/pom.xml index b1507cd301f31..53dcd51771054 100644 --- a/java/tools/pom.xml +++ b/java/tools/pom.xml @@ -36,7 +36,7 @@ org.immutables - value + value-annotations com.google.guava @@ -54,6 +54,11 @@ 1.3.14 test + com.fasterxml.jackson.core jackson-core @@ -85,7 +90,6 @@ maven-assembly-plugin - 3.7.1 jar-with-dependencies @@ -101,7 +105,21 @@ + + org.apache.maven.plugins + maven-dependency-plugin + + + analyze + verify + + + com.fasterxml.jackson.core:* + + + + + - diff --git a/java/vector/pom.xml b/java/vector/pom.xml index 07af93a499907..6ff869ee21aff 100644 --- a/java/vector/pom.xml +++ b/java/vector/pom.xml @@ -32,7 +32,7 @@ org.immutables - value + value-annotations com.fasterxml.jackson.core @@ -76,64 +76,7 @@ - - - - true - - - false - - apache - apache - https://repo.maven.apache.org/maven2/ - - - - - - - codegen - - ${basedir}/src/main/codegen - - - - - - - org.eclipse.m2e - lifecycle-mapping - 1.0.0 - - - - - - org.apache.drill.tools - drill-fmpp-maven-plugin - [1.0,) - - generate - - - - - false - true - - - - - - - - - - org.apache.maven.plugins @@ -163,33 +106,10 @@ - - maven-resources-plugin - - - - copy-fmpp-resources - - copy-resources - - initialize - - ${project.build.directory}/codegen - - - src/main/codegen - false - - - - - - org.apache.drill.tools drill-fmpp-maven-plugin - 1.21.1 generate-fmpp @@ -199,8 +119,8 @@ generate-sources src/main/codegen/config.fmpp - ${project.build.directory}/generated-sources - ${project.build.directory}/codegen/templates + ${project.build.directory}/generated-sources/fmpp + src/main/codegen/templates @@ -208,13 +128,6 @@ org.apache.maven.plugins maven-shade-plugin - - 3.2.4 @@ -228,10 +141,9 @@ com.google.flatbuffers:* + false true shade-format-flatbuffers - true - true com.google.flatbuffers @@ -243,7 +155,6 @@ - @@ -276,5 +187,4 @@ - diff --git a/java/vector/src/main/codegen/data/ArrowTypes.tdd b/java/vector/src/main/codegen/data/ArrowTypes.tdd index 3cf9a968791a4..72df4779793f0 100644 --- a/java/vector/src/main/codegen/data/ArrowTypes.tdd +++ b/java/vector/src/main/codegen/data/ArrowTypes.tdd @@ -65,6 +65,11 @@ fields: [], complex: false }, + { + name: "Utf8View", + fields: [], + complex: false + }, { name: "LargeUtf8", fields: [], @@ -75,6 +80,11 @@ fields: [], complex: false }, + { + name: "BinaryView", + fields: [], + complex: false + }, { name: "LargeBinary", fields: [], @@ -119,6 +129,11 @@ name: "Duration", fields: [{name: "unit", type: short, valueType: TimeUnit}], complex: false + }, + { + name: "ListView", + fields: [], + complex: true } ] } diff --git a/java/vector/src/main/codegen/data/ValueVectorTypes.tdd b/java/vector/src/main/codegen/data/ValueVectorTypes.tdd index 6c2a967712454..ad1f1b93bb3aa 100644 --- a/java/vector/src/main/codegen/data/ValueVectorTypes.tdd +++ b/java/vector/src/main/codegen/data/ValueVectorTypes.tdd @@ -189,7 +189,9 @@ fields: [{name: "start", type: "int"}, {name: "end", type: "int"}, {name: "buffer", type: "ArrowBuf"}], minor: [ { class: "VarBinary" , friendlyType: "byte[]" }, - { class: "VarChar" , friendlyType: "Text" } + { class: "VarChar" , friendlyType: "Text" }, + { class: "ViewVarBinary" , friendlyType: "byte[]" }, + { class: "ViewVarChar" , friendlyType: "Text" } ] }, { diff --git a/java/vector/src/main/codegen/templates/HolderReaderImpl.java b/java/vector/src/main/codegen/templates/HolderReaderImpl.java index 8394aaad41756..1151ea5d39dda 100644 --- a/java/vector/src/main/codegen/templates/HolderReaderImpl.java +++ b/java/vector/src/main/codegen/templates/HolderReaderImpl.java @@ -109,9 +109,9 @@ public void read(Nullable${name}Holder h) { byte[] value = new byte [length]; holder.buffer.getBytes(holder.start, value, 0, length); - <#if minor.class == "VarBinary" || minor.class == "LargeVarBinary"> + <#if minor.class == "VarBinary" || minor.class == "LargeVarBinary" || minor.class == "ViewVarBinary"> return value; - <#elseif minor.class == "VarChar" || minor.class == "LargeVarChar"> + <#elseif minor.class == "VarChar" || minor.class == "LargeVarChar" || minor.class == "ViewVarChar"> Text text = new Text(); text.set(value); return text; diff --git a/java/vector/src/main/codegen/templates/StructWriters.java b/java/vector/src/main/codegen/templates/StructWriters.java index b6dd2b75c526a..b676173ac39d9 100644 --- a/java/vector/src/main/codegen/templates/StructWriters.java +++ b/java/vector/src/main/codegen/templates/StructWriters.java @@ -61,6 +61,7 @@ public class ${mode}StructWriter extends AbstractFieldWriter { this.initialCapacity = 0; for (Field child : container.getField().getChildren()) { MinorType minorType = Types.getMinorTypeForArrowType(child.getType()); + addVectorAsNullable = child.isNullable(); switch (minorType) { case STRUCT: struct(child.getName()); diff --git a/java/vector/src/main/codegen/templates/UnionListWriter.java b/java/vector/src/main/codegen/templates/UnionListWriter.java index 5c0565ee27175..eeb964c055f71 100644 --- a/java/vector/src/main/codegen/templates/UnionListWriter.java +++ b/java/vector/src/main/codegen/templates/UnionListWriter.java @@ -26,7 +26,7 @@ import java.math.BigDecimal; <@pp.dropOutputFile /> -<#list ["List", "LargeList"] as listName> +<#list ["List", "ListView", "LargeList"] as listName> <@pp.changeOutputFile name="/org/apache/arrow/vector/complex/impl/Union${listName}Writer.java" /> @@ -59,6 +59,10 @@ public class Union${listName}Writer extends AbstractFieldWriter { private static final int OFFSET_WIDTH = 4; + <#if listName = "ListView"> + private static final long SIZE_WIDTH = 4; + + public Union${listName}Writer(${listName}Vector vector) { this(vector, NullableStructWriterFactory.getNullableStructWriterFactoryInstance()); } @@ -193,6 +197,24 @@ public void endList() { setPosition(idx() + 1); listStarted = false; } + <#elseif listName == "ListView"> + @Override + public void startList() { + vector.startNewValue(idx()); + writer.setPosition(vector.getOffsetBuffer().getInt((idx()) * OFFSET_WIDTH)); + listStarted = true; + } + + @Override + public void endList() { + int sizeUptoIdx = 0; + for (int i = 0; i < idx(); i++) { + sizeUptoIdx += vector.getSizeBuffer().getInt(i * SIZE_WIDTH); + } + vector.getSizeBuffer().setInt(idx() * SIZE_WIDTH, writer.idx() - sizeUptoIdx); + setPosition(idx() + 1); + listStarted = false; + } <#else> @Override public void startList() { diff --git a/java/vector/src/main/codegen/templates/UnionReader.java b/java/vector/src/main/codegen/templates/UnionReader.java index 822d4822987fb..243bd832255c2 100644 --- a/java/vector/src/main/codegen/templates/UnionReader.java +++ b/java/vector/src/main/codegen/templates/UnionReader.java @@ -39,7 +39,7 @@ @SuppressWarnings("unused") public class UnionReader extends AbstractFieldReader { - private static final int NUM_SUPPORTED_TYPES = 46; + private static final int NUM_SUPPORTED_TYPES = 49; private BaseReader[] readers = new BaseReader[NUM_SUPPORTED_TYPES]; public UnionVector data; diff --git a/java/vector/src/main/codegen/templates/ValueHolders.java b/java/vector/src/main/codegen/templates/ValueHolders.java index 973efd870a662..2a2bbe81b2e74 100644 --- a/java/vector/src/main/codegen/templates/ValueHolders.java +++ b/java/vector/src/main/codegen/templates/ValueHolders.java @@ -27,7 +27,6 @@ package org.apache.arrow.vector.holders; <#include "/@includes/vv_imports.ftl" /> - /** * Source code generated using FreeMarker template ${.template_name} */ @@ -40,11 +39,12 @@ public final class ${className} implements ValueHolder{ /** The last index (exclusive) into the Vector. **/ public int end; - + /** The Vector holding the actual values. **/ public ${minor.class}Vector vector; - + <#else> + public static final int WIDTH = ${type.width}; <#if mode.name == "Optional">public int isSet; @@ -70,10 +70,6 @@ public String toString(){ throw new UnsupportedOperationException(); } - - - - } diff --git a/java/vector/src/main/java/org/apache/arrow/vector/AddOrGetResult.java b/java/vector/src/main/java/org/apache/arrow/vector/AddOrGetResult.java index b41dbb245e8a2..5f9decbae4eac 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/AddOrGetResult.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/AddOrGetResult.java @@ -20,7 +20,7 @@ import org.apache.arrow.util.Preconditions; /** - * Tuple class containing a vector and whether is was created. + * Tuple class containing a vector and whether it was created. * * @param The type of vector the result is for. */ diff --git a/java/vector/src/main/java/org/apache/arrow/vector/BaseFixedWidthVector.java b/java/vector/src/main/java/org/apache/arrow/vector/BaseFixedWidthVector.java index c456c625389ba..a6e1a71dc36bd 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/BaseFixedWidthVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/BaseFixedWidthVector.java @@ -674,8 +674,8 @@ private void splitAndTransferValidityBuffer(int startIndex, int length, if (target.validityBuffer != null) { target.validityBuffer.getReferenceManager().release(); } - target.validityBuffer = validityBuffer.slice(firstByteSource, byteSizeTarget); - target.validityBuffer.getReferenceManager().retain(1); + ArrowBuf slicedValidityBuffer = validityBuffer.slice(firstByteSource, byteSizeTarget); + target.validityBuffer = transferBuffer(slicedValidityBuffer, target.allocator); target.refreshValueCapacity(); } else { /* Copy data diff --git a/java/vector/src/main/java/org/apache/arrow/vector/BaseLargeVariableWidthVector.java b/java/vector/src/main/java/org/apache/arrow/vector/BaseLargeVariableWidthVector.java index 2ef6e4bd8b374..6365493051b92 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/BaseLargeVariableWidthVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/BaseLargeVariableWidthVector.java @@ -42,8 +42,7 @@ /** * BaseLargeVariableWidthVector is a base class providing functionality for large strings/large bytes types. */ -public abstract class BaseLargeVariableWidthVector extends BaseValueVector - implements VariableWidthVector, FieldVector, VectorDefinitionSetter { +public abstract class BaseLargeVariableWidthVector extends BaseValueVector implements VariableWidthFieldVector { private static final int DEFAULT_RECORD_BYTE_COUNT = 12; private static final int INITIAL_BYTE_COUNT = INITIAL_VALUE_ALLOCATION * DEFAULT_RECORD_BYTE_COUNT; private int lastValueCapacity; @@ -942,6 +941,7 @@ public void setValueCount(int valueCount) { * * @param index target index */ + @Override public void fillEmpties(int index) { handleSafe(index, emptyByteArray.length); fillHoles(index); @@ -955,6 +955,7 @@ public void fillEmpties(int index) { * * @param value desired index of last non-null element. */ + @Override public void setLastSet(int value) { lastSet = value; } @@ -964,6 +965,7 @@ public void setLastSet(int value) { * * @return index of the last non-null element */ + @Override public int getLastSet() { return lastSet; } @@ -1003,6 +1005,7 @@ public void setValueLengthSafe(int index, int length) { * @param index position of element to get * @return greater than 0 length for non-null element, 0 otherwise */ + @Override public int getValueLength(int index) { assert index >= 0; if (isSet(index) == 0) { @@ -1021,6 +1024,7 @@ public int getValueLength(int index) { * @param index position of the element to set * @param value array of bytes to write */ + @Override public void set(int index, byte[] value) { assert index >= 0; fillHoles(index); @@ -1037,6 +1041,7 @@ public void set(int index, byte[] value) { * @param index position of the element to set * @param value array of bytes to write */ + @Override public void setSafe(int index, byte[] value) { assert index >= 0; handleSafe(index, value.length); @@ -1055,6 +1060,7 @@ public void setSafe(int index, byte[] value) { * @param start start index in array of bytes * @param length length of data in array of bytes */ + @Override public void set(int index, byte[] value, int start, int length) { assert index >= 0; fillHoles(index); @@ -1091,6 +1097,7 @@ public void setSafe(int index, byte[] value, int start, int length) { * @param start start index in ByteBuffer * @param length length of data in ByteBuffer */ + @Override public void set(int index, ByteBuffer value, int start, int length) { assert index >= 0; fillHoles(index); diff --git a/java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthVector.java b/java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthVector.java index d533629cdd44e..0412b9600b773 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthVector.java @@ -42,8 +42,7 @@ /** * BaseVariableWidthVector is a base class providing functionality for strings/bytes types. */ -public abstract class BaseVariableWidthVector extends BaseValueVector - implements VariableWidthVector, FieldVector, VectorDefinitionSetter { +public abstract class BaseVariableWidthVector extends BaseValueVector implements VariableWidthFieldVector { private static final int DEFAULT_RECORD_BYTE_COUNT = 8; private static final int INITIAL_BYTE_COUNT = INITIAL_VALUE_ALLOCATION * DEFAULT_RECORD_BYTE_COUNT; private static final int MAX_BUFFER_SIZE = (int) Math.min(MAX_ALLOCATION_SIZE, Integer.MAX_VALUE); @@ -993,6 +992,7 @@ public void setValueCount(int valueCount) { * * @param index target index */ + @Override public void fillEmpties(int index) { handleSafe(index, emptyByteArray.length); fillHoles(index); @@ -1006,6 +1006,7 @@ public void fillEmpties(int index) { * * @param value desired index of last non-null element. */ + @Override public void setLastSet(int value) { lastSet = value; } @@ -1015,6 +1016,7 @@ public void setLastSet(int value) { * * @return index of the last non-null element */ + @Override public int getLastSet() { return lastSet; } @@ -1050,6 +1052,7 @@ public void setIndexDefined(int index) { * @param index position of the element to set * @param length length of the element */ + @Override public void setValueLengthSafe(int index, int length) { assert index >= 0; handleSafe(index, length); @@ -1065,6 +1068,7 @@ public void setValueLengthSafe(int index, int length) { * @param index position of element to get * @return greater than 0 length for non-null element, 0 otherwise */ + @Override public int getValueLength(int index) { assert index >= 0; if (isSet(index) == 0) { @@ -1083,6 +1087,7 @@ public int getValueLength(int index) { * @param index position of the element to set * @param value array of bytes to write */ + @Override public void set(int index, byte[] value) { assert index >= 0; fillHoles(index); @@ -1099,6 +1104,7 @@ public void set(int index, byte[] value) { * @param index position of the element to set * @param value array of bytes to write */ + @Override public void setSafe(int index, byte[] value) { assert index >= 0; handleSafe(index, value.length); @@ -1153,6 +1159,7 @@ public void setSafe(int index, byte[] value, int start, int length) { * @param start start index in ByteBuffer * @param length length of data in ByteBuffer */ + @Override public void set(int index, ByteBuffer value, int start, int length) { assert index >= 0; fillHoles(index); diff --git a/java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthViewVector.java b/java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthViewVector.java new file mode 100644 index 0000000000000..dffb4a39a9cd6 --- /dev/null +++ b/java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthViewVector.java @@ -0,0 +1,1670 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.vector; + +import static org.apache.arrow.memory.util.LargeMemoryUtil.capAtMaxInt; +import static org.apache.arrow.vector.util.DataSizeRoundingUtil.roundUpToMultipleOf16; + +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; + +import org.apache.arrow.memory.ArrowBuf; +import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.memory.OutOfMemoryException; +import org.apache.arrow.memory.ReusableBuffer; +import org.apache.arrow.memory.util.ArrowBufPointer; +import org.apache.arrow.memory.util.ByteFunctionHelpers; +import org.apache.arrow.memory.util.CommonUtil; +import org.apache.arrow.memory.util.hash.ArrowBufHasher; +import org.apache.arrow.util.Preconditions; +import org.apache.arrow.vector.compare.VectorVisitor; +import org.apache.arrow.vector.ipc.message.ArrowFieldNode; +import org.apache.arrow.vector.types.pojo.Field; +import org.apache.arrow.vector.util.CallBack; +import org.apache.arrow.vector.util.OversizedAllocationException; +import org.apache.arrow.vector.util.TransferPair; + +/** + * BaseVariableWidthViewVector is a base class providing functionality for strings/bytes types in view format. + * + */ +public abstract class BaseVariableWidthViewVector extends BaseValueVector implements VariableWidthFieldVector { + // A single element of a view comprises 16 bytes + public static final int ELEMENT_SIZE = 16; + public static final int INITIAL_VIEW_VALUE_ALLOCATION = 4096; + private static final int INITIAL_BYTE_COUNT = INITIAL_VIEW_VALUE_ALLOCATION * ELEMENT_SIZE; + private static final int MAX_BUFFER_SIZE = (int) Math.min(MAX_ALLOCATION_SIZE, Integer.MAX_VALUE); + private int lastValueCapacity; + private long lastValueAllocationSizeInBytes; + + /* + * Variable Width View Vector comprises the following format + * + * Short strings, length <= 12 + * | Bytes 0-3 | Bytes 4-15 | + * |------------|---------------------------------------| + * | length | data (padded with 0) | + * |------------|---------------------------------------| + * + * Long strings, length > 12 + * | Bytes 0-3 | Bytes 4-7 | Bytes 8-11 | Bytes 12-15 | + * |------------|------------|------------|-------------| + * | length | prefix | buf.index | offset | + * |------------|------------|------------|-------------| + * + * */ + // 12 byte unsigned int to track inline views + public static final int INLINE_SIZE = 12; + // The first 4 bytes of view are allocated for length + public static final int LENGTH_WIDTH = 4; + // The second 4 bytes of view are allocated for prefix width + public static final int PREFIX_WIDTH = 4; + // The third 4 bytes of view are allocated for buffer index + public static final int BUF_INDEX_WIDTH = 4; + public static final byte[] EMPTY_BYTE_ARRAY = new byte[]{}; + protected ArrowBuf validityBuffer; + // The view buffer is used to store the variable width view elements + protected ArrowBuf viewBuffer; + // The external buffer which stores the long strings + protected List dataBuffers; + protected int initialDataBufferSize; + protected int valueCount; + protected int lastSet; + protected final Field field; + + + /** + * Constructs a new instance. + * + * @param field The field materialized by this vector + * @param allocator The allocator to use for creating/resizing buffers + */ + public BaseVariableWidthViewVector(Field field, final BufferAllocator allocator) { + super(allocator); + this.field = field; + lastValueAllocationSizeInBytes = INITIAL_BYTE_COUNT; + lastValueCapacity = INITIAL_VIEW_VALUE_ALLOCATION; + valueCount = 0; + lastSet = -1; + validityBuffer = allocator.getEmpty(); + viewBuffer = allocator.getEmpty(); + dataBuffers = new ArrayList<>(); + } + + @Override + public String getName() { + return field.getName(); + } + + /* TODO: + * see if getNullCount() can be made faster -- O(1) + */ + + /* TODO: + * Once the entire hierarchy has been refactored, move common functions + * like getNullCount(), splitAndTransferValidityBuffer to top level + * base class BaseValueVector. + * + * Along with this, some class members (validityBuffer) can also be + * abstracted out to top level base class. + * + * Right now BaseValueVector is the top level base class for other + * vector types in ValueVector hierarchy (non-nullable) and those + * vectors have not yet been refactored/removed so moving things to + * the top class as of now is not a good idea. + */ + + /* TODO: + * Implement TransferPair functionality + * https://github.com/apache/arrow/issues/40932 + * + */ + + /** + * Get buffer that manages the validity (NULL or NON-NULL nature) of + * elements in the vector. Consider it as a buffer for internal bit vector + * data structure. + * + * @return buffer + */ + @Override + public ArrowBuf getValidityBuffer() { + return validityBuffer; + } + + /** + * Get the buffer that stores the data for elements in the vector. + * + * @return buffer + */ + @Override + public ArrowBuf getDataBuffer() { + return viewBuffer; + } + + /** + * Get the buffers that store the data for views in the vector. + * + * @return list of ArrowBuf + */ + public List getDataBuffers() { + return dataBuffers; + } + + /** + * BaseVariableWidthViewVector doesn't support offset buffer. + * + * @return throws UnsupportedOperationException + */ + @Override + public ArrowBuf getOffsetBuffer() { + throw new UnsupportedOperationException("Offset buffer is not supported in BaseVariableWidthViewVector"); + } + + /** + * BaseVariableWidthViewVector doesn't support offset buffer. + * + * @return throws UnsupportedOperationException + */ + @Override + public long getOffsetBufferAddress() { + throw new UnsupportedOperationException("Offset buffer is not supported in BaseVariableWidthViewVector"); + } + + /** + * Get the memory address of buffer that manages the validity + * (NULL or NON-NULL nature) of elements in the vector. + * + * @return starting address of the buffer + */ + @Override + public long getValidityBufferAddress() { + return validityBuffer.memoryAddress(); + } + + /** + * Get the memory address of buffer that stores the data for elements + * in the vector. + * + * @return starting address of the buffer + */ + @Override + public long getDataBufferAddress() { + return viewBuffer.memoryAddress(); + } + + /** + * Sets the desired value capacity for the vector. This function doesn't + * allocate any memory for the vector. + * + * @param valueCount desired number of elements in the vector + */ + @Override + public void setInitialCapacity(int valueCount) { + final long size = (long) valueCount * ELEMENT_SIZE; + checkDataBufferSize(size); + lastValueAllocationSizeInBytes = (int) size; + lastValueCapacity = valueCount; + } + + /** + * Sets the desired value capacity for the vector. This function doesn't + * allocate any memory for the vector. + * + * @param valueCount desired number of elements in the vector + * @param density average number of bytes per variable width view element + */ + @Override + public void setInitialCapacity(int valueCount, double density) { + final long size = (long) valueCount * ELEMENT_SIZE; + initialDataBufferSize = (int) (valueCount * density); + checkDataBufferSize(size); + lastValueAllocationSizeInBytes = (int) size; + lastValueCapacity = valueCount; + } + + /** + * Get the density of this ListVector. + * @return density + */ + public double getDensity() { + if (valueCount == 0) { + return 0.0D; + } + final double totalListSize = getTotalValueLengthUpToIndex(valueCount); + return totalListSize / valueCount; + } + + /** + * Get the current capacity which does not exceed either validity buffer or value buffer. + * Note: Here the `getValueCapacity` has a relationship with the value buffer. + * + * @return number of elements that vector can hold. + */ + @Override + public int getValueCapacity() { + final int validityCapacity = getValidityBufferValueCapacity(); + final int valueBufferCapacity = Math.max(capAtMaxInt(viewBuffer.capacity() / ELEMENT_SIZE), 0); + return Math.min(valueBufferCapacity, validityCapacity); + } + + private int getValidityBufferValueCapacity() { + return capAtMaxInt(validityBuffer.capacity() * 8); + } + + /** + * zero out the vector and the data in associated buffers. + */ + public void zeroVector() { + initValidityBuffer(); + viewBuffer.setZero(0, viewBuffer.capacity()); + clearDataBuffers(); + } + + /* zero out the validity buffer */ + private void initValidityBuffer() { + validityBuffer.setZero(0, validityBuffer.capacity()); + } + + /** + * Reset the vector to initial state. + * Note that this method doesn't release any memory. + */ + @Override + public void reset() { + zeroVector(); + lastSet = -1; + valueCount = 0; + } + + /** + * Close the vector and release the associated buffers. + */ + @Override + public void close() { + clear(); + } + + /** + * Same as {@link #close()}. + */ + @Override + public void clear() { + validityBuffer = releaseBuffer(validityBuffer); + viewBuffer = releaseBuffer(viewBuffer); + clearDataBuffers(); + lastSet = -1; + valueCount = 0; + } + + /** + * Release the data buffers and clear the list. + */ + public void clearDataBuffers() { + for (ArrowBuf buffer : dataBuffers) { + releaseBuffer(buffer); + } + dataBuffers.clear(); + } + + /** + * Get the inner vectors. + * + * @deprecated This API will be removed as the current implementations no longer support inner vectors. + * + * @return the inner vectors for this field as defined by the TypeLayout + */ + @Deprecated + @Override + public List getFieldInnerVectors() { + throw new UnsupportedOperationException("There are no inner vectors. Use getFieldBuffers"); + } + + /** + * Initialize the children in schema for this Field. This operation is a + * NO-OP for scalar types since they don't have any children. + * @param children the schema + * @throws IllegalArgumentException if children is a non-empty list for scalar types. + */ + @Override + public void initializeChildrenFromFields(List children) { + if (!children.isEmpty()) { + throw new IllegalArgumentException("primitive type vector cannot have children"); + } + } + + /** + * Get the inner child vectors. + * @return list of child vectors for complex types, empty list for scalar vector types + */ + @Override + public List getChildrenFromFields() { + return Collections.emptyList(); + } + + + /** + * Load the buffers of this vector with provided source buffers. + * The caller manages the source buffers and populates them before invoking + * this method. + * @param fieldNode the fieldNode indicating the value count + * @param ownBuffers the buffers for this Field (own buffers only, children not included) + */ + @Override + public void loadFieldBuffers(ArrowFieldNode fieldNode, List ownBuffers) { + ArrowBuf bitBuf = ownBuffers.get(0); + ArrowBuf viewBuf = ownBuffers.get(1); + List dataBufs = ownBuffers.subList(2, ownBuffers.size()); + + this.clear(); + + this.viewBuffer = viewBuf.getReferenceManager().retain(viewBuf, allocator); + this.validityBuffer = BitVectorHelper.loadValidityBuffer(fieldNode, bitBuf, allocator); + + for (ArrowBuf dataBuf : dataBufs) { + this.dataBuffers.add(dataBuf.getReferenceManager().retain(dataBuf, allocator)); + } + + lastSet = fieldNode.getLength() - 1; + valueCount = fieldNode.getLength(); + } + + /** + * Get the buffers belonging to this vector. + * @return the inner buffers. + */ + @Override + public List getFieldBuffers() { + List result = new ArrayList<>(2 + dataBuffers.size()); + setReaderAndWriterIndex(); + result.add(validityBuffer); + result.add(viewBuffer); + // append data buffers + result.addAll(dataBuffers); + + return result; + } + + /** + * Set the reader and writer indexes for the inner buffers. + */ + private void setReaderAndWriterIndex() { + validityBuffer.readerIndex(0); + viewBuffer.readerIndex(0); + if (valueCount == 0) { + validityBuffer.writerIndex(0); + viewBuffer.writerIndex(0); + } else { + validityBuffer.writerIndex(getValidityBufferSizeFromCount(valueCount)); + viewBuffer.writerIndex(valueCount * ELEMENT_SIZE); + } + } + + /** + * Same as {@link #allocateNewSafe()}. + */ + @Override + public void allocateNew() { + allocateNew(lastValueAllocationSizeInBytes, lastValueCapacity); + } + + /** + * Allocate memory for the vector. We internally use a default value count + * of 4096 to allocate memory for at least these many elements in the + * vector. See {@link #allocateNew(long, int)} for allocating memory for specific + * number of elements in the vector. + * + * @return false if memory allocation fails, true otherwise. + */ + @Override + public boolean allocateNewSafe() { + try { + allocateNew(lastValueAllocationSizeInBytes, lastValueCapacity); + return true; + } catch (Exception e) { + return false; + } + } + + /** + * Allocate memory for the vector to support storing at least the provided number of + * elements in the vector. This method must be called prior to using the ValueVector. + * + * @param totalBytes desired total memory capacity + * @param valueCount the desired number of elements in the vector + * @throws OutOfMemoryException if memory allocation fails + */ + @Override + public void allocateNew(long totalBytes, int valueCount) { + assert totalBytes >= 0; + + checkDataBufferSize(totalBytes); + + /* we are doing a new allocation -- release the current buffers */ + clear(); + + try { + allocateBytes(totalBytes, valueCount); + } catch (Exception e) { + clear(); + throw e; + } + } + + @Override + public void allocateNew(int valueCount) { + allocateNew(lastValueAllocationSizeInBytes, valueCount); + } + + /* Check if the data buffer size is within bounds. */ + private void checkDataBufferSize(long size) { + if (size > MAX_BUFFER_SIZE || size < 0) { + throw new OversizedAllocationException("Memory required for vector " + + "is (" + size + "), which is overflow or more than max allowed (" + MAX_BUFFER_SIZE + "). " + + "You could consider using LargeVarCharVector/LargeVarBinaryVector for large strings/large bytes types"); + } + } + + /* allocate the inner buffers */ + private void allocateBytes(final long valueBufferSize, final int valueCount) { + /* allocate data buffer */ + viewBuffer = allocator.buffer(valueBufferSize); + viewBuffer.readerIndex(0); + + validityBuffer = allocator.buffer((valueCount + 7) / 8); + initValidityBuffer(); + + lastValueCapacity = getValueCapacity(); + lastValueAllocationSizeInBytes = capAtMaxInt(viewBuffer.capacity()); + } + + /** + * Resize the vector to increase the capacity. The internal behavior is to + * double the current value capacity. + */ + @Override + public void reAlloc() { + reallocViewBuffer(); + reallocViewDataBuffer(); + reallocValidityBuffer(); + } + + /** + * Reallocate the view buffer. View Buffer stores the views for + * VIEWVARCHAR or VIEWVARBINARY elements in the vector. The behavior is to double + * the size of buffer. + * @throws OversizedAllocationException if the desired new size is more than + * max allowed + * @throws OutOfMemoryException if the internal memory allocation fails + */ + public void reallocViewBuffer() { + long currentViewBufferCapacity = viewBuffer.capacity(); + + long newAllocationSize = currentViewBufferCapacity * 2; + if (newAllocationSize == 0) { + if (lastValueAllocationSizeInBytes > 0) { + newAllocationSize = lastValueAllocationSizeInBytes; + } else { + newAllocationSize = INITIAL_BYTE_COUNT * 2L; + } + } + + reallocViewBuffer(newAllocationSize); + } + + /** + * Reallocate the data buffer associated with view buffer. + */ + public void reallocViewDataBuffer() { + long currentDataBufferCapacity = 0; + if (!dataBuffers.isEmpty()) { + currentDataBufferCapacity = dataBuffers.get(dataBuffers.size() - 1).capacity(); + } + + long newAllocationSize = currentDataBufferCapacity * 2; + if (newAllocationSize == 0) { + if (lastValueAllocationSizeInBytes > 0) { + newAllocationSize = lastValueAllocationSizeInBytes; + } else { + newAllocationSize = INITIAL_BYTE_COUNT * 2L; + } + } + + reallocViewDataBuffer(newAllocationSize); + } + + /** + * Reallocate the view buffer to given size. View Buffer stores the views for + * VIEWVARCHAR or VIEWVARBINARY elements in the vector. The actual allocated size may be larger + * than the request one because it will round up the provided value to the nearest + * power of two. + * + * @param desiredAllocSize the desired new allocation size + * @throws OversizedAllocationException if the desired new size is more than + * max allowed + * @throws OutOfMemoryException if the internal memory allocation fails + */ + public void reallocViewBuffer(long desiredAllocSize) { + if (desiredAllocSize == 0) { + return; + } + long newAllocationSize = CommonUtil.nextPowerOfTwo(desiredAllocSize); + assert newAllocationSize >= 1; + + checkDataBufferSize(newAllocationSize); + // for each set operation, we have to allocate 16 bytes + // here we are adjusting the desired allocation-based allocation size + // to align with the 16bytes requirement. + newAllocationSize = roundUpToMultipleOf16(newAllocationSize); + + final ArrowBuf newBuf = allocator.buffer(newAllocationSize); + newBuf.setBytes(0, viewBuffer, 0, viewBuffer.capacity()); + + viewBuffer.getReferenceManager().release(); + viewBuffer = newBuf; + lastValueAllocationSizeInBytes = viewBuffer.capacity(); + } + + /** + * Reallocate the data buffer for views. + * + * @param desiredAllocSize allocation size in bytes + */ + public void reallocViewDataBuffer(long desiredAllocSize) { + if (desiredAllocSize == 0) { + return; + } + + if (dataBuffers.isEmpty()) { + return; + } + + ArrowBuf currentBuf = dataBuffers.get(dataBuffers.size() - 1); + if (currentBuf.capacity() - currentBuf.writerIndex() >= desiredAllocSize) { + return; + } + + final long newAllocationSize = CommonUtil.nextPowerOfTwo(desiredAllocSize); + assert newAllocationSize >= 1; + + checkDataBufferSize(newAllocationSize); + + final ArrowBuf newBuf = allocator.buffer(newAllocationSize); + dataBuffers.add(newBuf); + } + + /** + * Reallocate Validity buffer. + */ + public void reallocValidityBuffer() { + int targetValidityCount = capAtMaxInt((validityBuffer.capacity() * 8) * 2); + if (targetValidityCount == 0) { + if (lastValueCapacity > 0) { + targetValidityCount = lastValueCapacity; + } else { + targetValidityCount = 2 * INITIAL_VALUE_ALLOCATION; + } + } + + long validityBufferSize = computeValidityBufferSize(targetValidityCount); + + final ArrowBuf newValidityBuffer = allocator.buffer(validityBufferSize); + newValidityBuffer.setBytes(0, validityBuffer, 0, validityBuffer.capacity()); + newValidityBuffer.setZero(validityBuffer.capacity(), newValidityBuffer.capacity() - validityBuffer.capacity()); + validityBuffer.getReferenceManager().release(); + validityBuffer = newValidityBuffer; + + lastValueCapacity = getValueCapacity(); + } + + private long computeValidityBufferSize(int valueCount) { + return (valueCount + 7) / 8; + } + + /** + * Get the size (number of bytes) of underlying view buffer. + * @return number of bytes in the view buffer + */ + @Override + public int getByteCapacity() { + return capAtMaxInt(viewBuffer.capacity()); + } + + @Override + public int sizeOfValueBuffer() { + throw new UnsupportedOperationException("sizeOfValueBuffer is not supported for BaseVariableWidthViewVector"); + } + + /** + * Get the size (number of bytes) of underlying elements in the view buffer. + * @return number of bytes used by data in the view buffer + */ + public int sizeOfViewBufferElements() { + if (valueCount == 0) { + return 0; + } + int totalSize = 0; + for (int i = 0; i < valueCount; i++) { + totalSize += getValueLength(i); + } + return totalSize; + } + + /** + * Get the size (number of bytes) of underlying buffers used by this + * vector. + * @return size of underlying buffers. + */ + @Override + public int getBufferSize() { + return getBufferSizeFor(this.valueCount); + } + + /** + * Get the potential buffer size for a particular number of records. + * @param valueCount desired number of elements in the vector + * @return estimated size of underlying buffers if the vector holds + * a given number of elements + */ + @Override + public int getBufferSizeFor(final int valueCount) { + if (valueCount == 0) { + return 0; + } + + final int validityBufferSize = getValidityBufferSizeFromCount(valueCount); + final int viewBufferSize = valueCount * ELEMENT_SIZE; + final int dataBufferSize = getDataBufferSize(); + return validityBufferSize + viewBufferSize + dataBufferSize; + } + + private int getDataBufferSize() { + int dataBufferSize = 0; + for (ArrowBuf buf : dataBuffers) { + dataBufferSize += (int) buf.writerIndex(); + } + return dataBufferSize; + } + + /** + * Get information about how this field is materialized. + * @return the field corresponding to this vector + */ + @Override + public Field getField() { + return field; + } + + /** + * Return the underlying buffers associated with this vector. Note that this doesn't + * impact the reference counts for this buffer, so it only should be used for in-context + * access. Also note that this buffer changes regularly, thus + * external classes shouldn't hold a reference to it (unless they change it). + * @param clear Whether to clear vector before returning, the buffers will still be refcounted + * but the returned array will be the only reference to them + * @return The underlying {@link ArrowBuf buffers} that is used by this + * vector instance. + */ + @Override + public ArrowBuf[] getBuffers(boolean clear) { + final ArrowBuf[] buffers; + setReaderAndWriterIndex(); + if (getBufferSize() == 0) { + buffers = new ArrowBuf[0]; + } else { + final int dataBufferSize = dataBuffers.size(); + // validity and view buffers + final int fixedBufferSize = 2; + buffers = new ArrowBuf[fixedBufferSize + dataBufferSize]; + buffers[0] = validityBuffer; + buffers[1] = viewBuffer; + for (int i = fixedBufferSize; i < fixedBufferSize + dataBufferSize; i++) { + buffers[i] = dataBuffers.get(i - fixedBufferSize); + } + } + if (clear) { + for (final ArrowBuf buffer : buffers) { + buffer.getReferenceManager().retain(); + } + clear(); + } + return buffers; + } + + /** + * Validate the scalar values held by this vector. + */ + public void validateScalars() { + // No validation by default. + } + + /** + * Construct a transfer pair of this vector and another vector of the same type. + * @param field The field materialized by this vector. + * @param allocator allocator for the target vector + * @param callBack not used + * @return TransferPair + */ + @Override + public TransferPair getTransferPair(Field field, BufferAllocator allocator, CallBack callBack) { + return getTransferPair(field, allocator); + } + + /** + * Construct a transfer pair of this vector and another vector of the same type. + * @param ref name of the target vector + * @param allocator allocator for the target vector + * @param callBack not used + * @return TransferPair + */ + @Override + public TransferPair getTransferPair(String ref, BufferAllocator allocator, CallBack callBack) { + return getTransferPair(ref, allocator); + } + + /** + * Construct a transfer pair of this vector and another vector of the same type. + * @param allocator allocator for the target vector + * @return TransferPair + */ + @Override + public TransferPair getTransferPair(BufferAllocator allocator) { + return getTransferPair(getName(), allocator); + } + + /** + * Construct a transfer pair of this vector and another vector of the same type. + * @param ref name of the target vector + * @param allocator allocator for the target vector + * @return TransferPair + */ + @Override + public abstract TransferPair getTransferPair(String ref, BufferAllocator allocator); + + /** + * Construct a transfer pair of this vector and another vector of the same type. + * @param field The field materialized by this vector. + * @param allocator allocator for the target vector + * @return TransferPair + */ + @Override + public abstract TransferPair getTransferPair(Field field, BufferAllocator allocator); + + /** + * Transfer this vector's data to another vector. + * The memory associated with this vector is transferred to the allocator of target vector + * for accounting and management purposes. + * @param target destination vector for transfer + */ + public void transferTo(BaseVariableWidthViewVector target) { + compareTypes(target, "transferTo"); + target.clear(); + target.validityBuffer = transferBuffer(validityBuffer, target.allocator); + target.viewBuffer = transferBuffer(viewBuffer, target.allocator); + target.dataBuffers = new ArrayList<>(dataBuffers.size()); + for (int i = 0; i < dataBuffers.size(); i++) { + target.dataBuffers.add(transferBuffer(dataBuffers.get(i), target.allocator)); + } + + target.setLastSet(this.lastSet); + if (this.valueCount > 0) { + target.setValueCount(this.valueCount); + } + clear(); + } + + /** + * Slice this vector at desired index and length and transfer the + * corresponding data to the target vector. + * @param startIndex start position of the split in source vector. + * @param length length of the split. + * @param target destination vector + */ + public void splitAndTransferTo(int startIndex, int length, + BaseVariableWidthViewVector target) { + Preconditions.checkArgument(startIndex >= 0 && length >= 0 && startIndex + length <= valueCount, + "Invalid parameters startIndex: %s, length: %s for valueCount: %s", startIndex, length, valueCount); + compareTypes(target, "splitAndTransferTo"); + target.clear(); + if (length > 0) { + splitAndTransferValidityBuffer(startIndex, length, target); + splitAndTransferViewBufferAndDataBuffer(startIndex, length, target); + target.setLastSet(length - 1); + target.setValueCount(length); + } + } + + /* allocate validity buffer */ + private void allocateValidityBuffer(final long size) { + final int curSize = (int) size; + validityBuffer = allocator.buffer(curSize); + validityBuffer.readerIndex(0); + initValidityBuffer(); + } + + /* + * Transfer the validity. + */ + private void splitAndTransferValidityBuffer(int startIndex, int length, + BaseVariableWidthViewVector target) { + if (length <= 0) { + return; + } + + final int firstByteSource = BitVectorHelper.byteIndex(startIndex); + final int lastByteSource = BitVectorHelper.byteIndex(valueCount - 1); + final int byteSizeTarget = getValidityBufferSizeFromCount(length); + final int offset = startIndex % 8; + + if (offset == 0) { + // slice + if (target.validityBuffer != null) { + target.validityBuffer.getReferenceManager().release(); + } + final ArrowBuf slicedValidityBuffer = validityBuffer.slice(firstByteSource, byteSizeTarget); + target.validityBuffer = transferBuffer(slicedValidityBuffer, target.allocator); + return; + } + + /* Copy data + * When the first bit starts from the middle of a byte (offset != 0), + * copy data from src BitVector. + * Each byte in the target is composed by a part in i-th byte, + * another part in (i+1)-th byte. + */ + target.allocateValidityBuffer(byteSizeTarget); + + for (int i = 0; i < byteSizeTarget - 1; i++) { + byte b1 = BitVectorHelper.getBitsFromCurrentByte(this.validityBuffer, firstByteSource + i, offset); + byte b2 = BitVectorHelper.getBitsFromNextByte(this.validityBuffer, firstByteSource + i + 1, offset); + + target.validityBuffer.setByte(i, (b1 + b2)); + } + /* Copying the last piece is done in the following manner: + * if the source vector has 1 or more bytes remaining, we copy + * the last piece as a byte formed by shifting data + * from the current byte and the next byte. + * + * if the source vector has no more bytes remaining + * (we are at the last byte), we copy the last piece as a byte + * by shifting data from the current byte. + */ + if ((firstByteSource + byteSizeTarget - 1) < lastByteSource) { + byte b1 = BitVectorHelper.getBitsFromCurrentByte(this.validityBuffer, + firstByteSource + byteSizeTarget - 1, offset); + byte b2 = BitVectorHelper.getBitsFromNextByte(this.validityBuffer, + firstByteSource + byteSizeTarget, offset); + + target.validityBuffer.setByte(byteSizeTarget - 1, b1 + b2); + } else { + byte b1 = BitVectorHelper.getBitsFromCurrentByte(this.validityBuffer, + firstByteSource + byteSizeTarget - 1, offset); + target.validityBuffer.setByte(byteSizeTarget - 1, b1); + } + } + + /** + * In split and transfer, the view buffer and the data buffer will be allocated. + * Then the values will be copied from the source vector to the target vector. + * Allocation and setting are preferred over transfer + * since the buf index and buf offset needs to be overwritten + * when large strings are added. + * @param startIndex starting index + * @param length number of elements to be copied + * @param target target vector + */ + private void splitAndTransferViewBufferAndDataBuffer(int startIndex, int length, + BaseVariableWidthViewVector target) { + if (length == 0) { + return; + } + + if (target.viewBuffer != null) { + target.viewBuffer.getReferenceManager().release(); + } + + // allocate target view buffer + target.viewBuffer = target.allocator.buffer(length * ELEMENT_SIZE); + + for (int i = startIndex; i < startIndex + length; i++) { + final int stringLength = getValueLength(i); + + // keeping track of writing index in the target view buffer + int writePosition = (i - startIndex) * ELEMENT_SIZE; + // keeping track of reading index in the source view buffer + int readPosition = i * ELEMENT_SIZE; + + // set length + target.viewBuffer.setInt(writePosition, stringLength); + + if (stringLength <= INLINE_SIZE) { + // handle inline buffer + writePosition += LENGTH_WIDTH; + readPosition += LENGTH_WIDTH; + // set data by copying the required portion from the source buffer + target.viewBuffer.setBytes(writePosition, viewBuffer, readPosition, stringLength); + } else { + // handle non-inline buffer + final int readBufIndex = viewBuffer.getInt(((long) i * ELEMENT_SIZE) + + LENGTH_WIDTH + PREFIX_WIDTH); + final int readBufOffset = viewBuffer.getInt(((long) i * ELEMENT_SIZE) + + LENGTH_WIDTH + PREFIX_WIDTH + BUF_INDEX_WIDTH); + final ArrowBuf dataBuf = dataBuffers.get(readBufIndex); + + // allocate data buffer + ArrowBuf currentDataBuf = target.allocateOrGetLastDataBuffer(stringLength); + final long currentOffSet = currentDataBuf.writerIndex(); + + writePosition += LENGTH_WIDTH; + readPosition += LENGTH_WIDTH; + // set prefix + target.viewBuffer.setBytes(writePosition, viewBuffer, readPosition, PREFIX_WIDTH); + writePosition += PREFIX_WIDTH; + // set buf id + target.viewBuffer.setInt(writePosition, target.dataBuffers.size() - 1); + writePosition += BUF_INDEX_WIDTH; + // set offset + target.viewBuffer.setInt(writePosition, (int) currentOffSet); + + currentDataBuf.setBytes(currentOffSet, dataBuf, readBufOffset, stringLength); + currentDataBuf.writerIndex(currentOffSet + stringLength); + } + } + } + + /*----------------------------------------------------------------* + | | + | common getters and setters | + | | + *----------------------------------------------------------------*/ + + + /** + * Get the number of elements that are null in the vector. + * + * @return the number of null elements. + */ + @Override + public int getNullCount() { + return BitVectorHelper.getNullCount(validityBuffer, valueCount); + } + + /** + * Check if the given index is within the current value capacity + * of the vector. + * + * @param index position to check + * @return true if the index is within the current value capacity + */ + public boolean isSafe(int index) { + return index < getValueCapacity(); + } + + /** + * Check if an element at given index is null. + * + * @param index position of an element + * @return true if an element at given index is null + */ + @Override + public boolean isNull(int index) { + return (isSet(index) == 0); + } + + /** + * Same as {@link #isNull(int)}. + * + * @param index position of an element + * @return 1 if element at given index is not null, 0 otherwise + */ + public int isSet(int index) { + final int byteIndex = index >> 3; + final byte b = validityBuffer.getByte(byteIndex); + final int bitIndex = index & 7; + return (b >> bitIndex) & 0x01; + } + + /** + * Get the value count of vector. This will always be zero unless + * setValueCount(int) has been called prior to calling this. + * + * @return valueCount for the vector + */ + @Override + public int getValueCount() { + return valueCount; + } + + /** + * Sets the value count for the vector. + * + * @param valueCount value count + */ + @Override + public void setValueCount(int valueCount) { + assert valueCount >= 0; + this.valueCount = valueCount; + while (valueCount > getValueCapacity()) { + reallocViewBuffer(); + reallocValidityBuffer(); + } + lastSet = valueCount - 1; + setReaderAndWriterIndex(); + } + + /** + * Create holes in the vector upto the given index (exclusive). + * Holes will be created from the current last-set position in + * the vector. + * + * @param index target index + */ + @Override + public void fillEmpties(int index) { + handleSafe(index, EMPTY_BYTE_ARRAY.length); + lastSet = index - 1; + } + + /** + * Set the index of the last non-null element in the vector. + * It is important to call this method with appropriate value + * before calling {@link #setValueCount(int)}. + * + * @param value desired index of last non-null element. + */ + @Override + public void setLastSet(int value) { + lastSet = value; + } + + /** + * Get the index of the last non-null element in the vector. + * + * @return index of the last non-null element + */ + @Override + public int getLastSet() { + return lastSet; + } + + /** + * Mark the particular position in the vector as non-null. + * + * @param index position of the element. + */ + @Override + public void setIndexDefined(int index) { + // We need to check and reallocate the validity buffer + while (index >= getValueCapacity()) { + reallocValidityBuffer(); + } + BitVectorHelper.setBit(validityBuffer, index); + } + + /** + * Sets the value length for an element. + * + * @param index position of the element to set + * @param length length of the element + */ + @Override + public void setValueLengthSafe(int index, int length) { + assert index >= 0; + handleSafe(index, length); + lastSet = index; + } + + /** + * Get the length of the element at specified index. + * + * @param index position of an element to get + * @return greater than length 0 for a non-null element, 0 otherwise + */ + @Override + public int getValueLength(int index) { + assert index >= 0; + if (index < 0 || index >= viewBuffer.capacity() / ELEMENT_SIZE) { + throw new IndexOutOfBoundsException("Index out of bounds: " + index); + } + if (isSet(index) == 0) { + return 0; + } + return viewBuffer.getInt(((long) index * ELEMENT_SIZE)); + } + + /** + * Set the variable length element at the specified index to the supplied + * byte array. This is same as using {@link #set(int, byte[], int, int)} + * with start as Zero and length as #value.length + * + * @param index position of the element to set + * @param value array of bytes to write + */ + public void set(int index, byte[] value) { + assert index >= 0; + BitVectorHelper.setBit(validityBuffer, index); + setBytes(index, value, 0, value.length); + lastSet = index; + } + + /** + * Same as {@link #set(int, byte[])} except that it handles the + * case where index and length of a new element are beyond the existing + * capacity of the vector. + * + * @param index position of the element to set + * @param value array of bytes to write + */ + @Override + public void setSafe(int index, byte[] value) { + assert index >= 0; + // check if the current index can be populated + handleSafe(index, value.length); + BitVectorHelper.setBit(validityBuffer, index); + setBytes(index, value, 0, value.length); + lastSet = index; + } + + /** + * Set the variable length element at the specified index to the supplied + * byte array. + * + * @param index position of the element to set + * @param value array of bytes to write + * @param start start index in an array of bytes + * @param length length of data in an array of bytes + */ + public void set(int index, byte[] value, int start, int length) { + assert index >= 0; + BitVectorHelper.setBit(validityBuffer, index); + setBytes(index, value, start, length); + lastSet = index; + } + + /** + * Same as {@link #set(int, byte[], int, int)} except that it handles the + * case where index and length of a new element are beyond the existing + * capacity of the vector. + * + * @param index position of the element to set + * @param value array of bytes to write + * @param start start index in an array of bytes + * @param length length of data in an array of bytes + */ + public void setSafe(int index, byte[] value, int start, int length) { + assert index >= 0; + handleSafe(index, length); + BitVectorHelper.setBit(validityBuffer, index); + setBytes(index, value, start, length); + lastSet = index; + } + + /** + * Set the variable length element at the specified index to the + * content in supplied ByteBuffer. + * + * @param index position of the element to set + * @param value ByteBuffer with data + * @param start start index in ByteBuffer + * @param length length of data in ByteBuffer + */ + public void set(int index, ByteBuffer value, int start, int length) { + assert index >= 0; + BitVectorHelper.setBit(validityBuffer, index); + setBytes(index, value.array(), start, length); + lastSet = index; + } + + /** + * Same as {@link #set(int, ByteBuffer, int, int)} except that it handles the + * case where index and length of a new element are beyond the existing + * capacity of the vector. + * + * @param index position of the element to set + * @param value ByteBuffer with data + * @param start start index in ByteBuffer + * @param length length of data in ByteBuffer + */ + public void setSafe(int index, ByteBuffer value, int start, int length) { + assert index >= 0; + handleSafe(index, length); + BitVectorHelper.setBit(validityBuffer, index); + setBytes(index, value.array(), start, length); + lastSet = index; + } + + /** + * Set the element at the given index to null. + * + * @param index position of an element + */ + @Override + public void setNull(int index) { + // We need to check and reallocate the validity buffer + while (index >= getValueCapacity()) { + reallocValidityBuffer(); + } + BitVectorHelper.unsetBit(validityBuffer, index); + } + + /** + * Store the given value at a particular position in the vector. isSet indicates + * whether the value is NULL or not. + * @param index position of the new value + * @param isSet Zero for NULL value, 1 otherwise + * @param start start position of data in buffer + * @param end end position of data in buffer + * @param buffer data buffer containing the variable width element to be stored + * in the vector + */ + public void set(int index, int isSet, int start, int end, ArrowBuf buffer) { + assert index >= 0; + final int dataLength = end - start; + BitVectorHelper.setValidityBit(validityBuffer, index, isSet); + setBytes(index, buffer, start, dataLength); + lastSet = index; + } + + /** + * Same as {@link #set(int, int, int, int, ArrowBuf)} except that it handles the case + * when index is greater than or equal to current value capacity of the + * vector. + * @param index position of the new value + * @param isSet Zero for NULL value, 1 otherwise + * @param start start position of data in buffer + * @param end end position of data in buffer + * @param buffer data buffer containing the variable width element to be stored + * in the vector + */ + public void setSafe(int index, int isSet, int start, int end, ArrowBuf buffer) { + assert index >= 0; + final int dataLength = end - start; + handleSafe(index, dataLength); + BitVectorHelper.setValidityBit(validityBuffer, index, isSet); + setBytes(index, buffer, start, dataLength); + lastSet = index; + } + + /** + * Store the given value at a particular position in the vector. isSet indicates + * whether the value is NULL or not. + * @param index position of the new value + * @param start start position of data in buffer + * @param length length of data in buffer + * @param buffer data buffer containing the variable width element to be stored + * in the vector + */ + public void set(int index, int start, int length, ArrowBuf buffer) { + assert index >= 0; + BitVectorHelper.setBit(validityBuffer, index); + setBytes(index, buffer, start, length); + lastSet = index; + } + + /** + * Same as {@link #set(int, int, int, int, ArrowBuf)} except that it handles the case + * when index is greater than or equal to current value capacity of the + * vector. + * @param index position of the new value + * @param start start position of data in buffer + * @param length length of data in buffer + * @param buffer data buffer containing the variable width element to be stored + * in the vector + */ + public void setSafe(int index, int start, int length, ArrowBuf buffer) { + assert index >= 0; + handleSafe(index, length); + BitVectorHelper.setBit(validityBuffer, index); + setBytes(index, buffer, start, length); + lastSet = index; + } + + + /*----------------------------------------------------------------* + | | + | helper methods for setters | + | | + *----------------------------------------------------------------*/ + + + protected ArrowBuf allocateOrGetLastDataBuffer(int length) { + long dataBufferSize; + if (initialDataBufferSize > 0) { + dataBufferSize = Math.max(initialDataBufferSize, length); + } else { + dataBufferSize = Math.max(lastValueAllocationSizeInBytes, length); + } + + if (dataBuffers.isEmpty() || dataBuffers.get(dataBuffers.size() - 1).capacity() - + dataBuffers.get(dataBuffers.size() - 1).writerIndex() < length) { + ArrowBuf newBuf = allocator.buffer(dataBufferSize); + dataBuffers.add(newBuf); + } + + return dataBuffers.get(dataBuffers.size() - 1); + } + + /** + * This method is used to create a view buffer for a variable width vector. + * It handles both inline and data buffers. + *

    + * If the length of the value is less than or equal to {@link #INLINE_SIZE}, the value is stored in the valueBuffer + * directly as an inline buffer. + * The valueBuffer stores the length of the value followed by the value itself. + * If the length of the value is greater than {@link #INLINE_SIZE}, a new buffer is allocated and added to dataBuffers + * to hold the value. + * The viewBuffer in this case stores the length of the value, a prefix of the value, the index of the + * new buffer in dataBuffers, and the offset of the value in the new buffer. + * + * @param index The index at which the new value will be inserted. + * @param value The byte array that contains the data to be inserted. + * @param start The start index in the byte array from where the data for the new value begins. + * @param length The length of the data in the byte array that belongs to the new value. + */ + protected final void setBytes(int index, byte[] value, int start, int length) { + int writePosition = index * ELEMENT_SIZE; + + // to clear the memory segment of view being written to + // this is helpful in case of overwriting the value + viewBuffer.setZero(writePosition, ELEMENT_SIZE); + + if (value.length <= INLINE_SIZE) { + // allocate inline buffer + // set length + viewBuffer.setInt(writePosition, length); + writePosition += LENGTH_WIDTH; + // set data + viewBuffer.setBytes(writePosition, value, start, length); + } else { + // allocate data buffer + ArrowBuf currentBuf = allocateOrGetLastDataBuffer(length); + + // set length + viewBuffer.setInt(writePosition, length); + writePosition += LENGTH_WIDTH; + // set prefix + viewBuffer.setBytes(writePosition, value, start, PREFIX_WIDTH); + writePosition += PREFIX_WIDTH; + // set buf id + viewBuffer.setInt(writePosition, dataBuffers.size() - 1); + writePosition += BUF_INDEX_WIDTH; + // set offset + viewBuffer.setInt(writePosition, (int) currentBuf.writerIndex()); + + currentBuf.setBytes(currentBuf.writerIndex(), value, start, length); + currentBuf.writerIndex(currentBuf.writerIndex() + length); + } + } + + /** + * This method is used to create a view buffer for a variable width vector. + * Similar to {@link #setBytes(int index, byte[] value, int start, int length)} + * + * @param index The index at which the new value will be inserted. + * @param valueBuf The byte array that contains the data to be inserted. + * @param start The start index in the byte array from where the data for the new value begins. + * @param length The length of the data in the byte array that belongs to the new value. + */ + protected final void setBytes(int index, ArrowBuf valueBuf, int start, int length) { + int writePosition = index * ELEMENT_SIZE; + + // to clear the memory segment of view being written to + // this is helpful in case of overwriting the value + viewBuffer.setZero(writePosition, ELEMENT_SIZE); + + if (length <= INLINE_SIZE) { + // allocate inline buffer + // set length + viewBuffer.setInt(writePosition, length); + writePosition += LENGTH_WIDTH; + // set data + viewBuffer.setBytes(writePosition, valueBuf, start, length); + } else { + // allocate data buffer + ArrowBuf currentBuf = allocateOrGetLastDataBuffer(length); + + // set length + viewBuffer.setInt(writePosition, length); + writePosition += LENGTH_WIDTH; + // set prefix + viewBuffer.setBytes(writePosition, valueBuf, start, PREFIX_WIDTH); + writePosition += PREFIX_WIDTH; + // set buf id + viewBuffer.setInt(writePosition, dataBuffers.size() - 1); + writePosition += BUF_INDEX_WIDTH; + // set offset + viewBuffer.setInt(writePosition, (int) currentBuf.writerIndex()); + + currentBuf.setBytes(currentBuf.writerIndex(), valueBuf, start, length); + currentBuf.writerIndex(currentBuf.writerIndex() + length); + } + } + + /** + * Get the total length of the elements up to the given index. + * @param index The index of the element in the vector. + * @return The total length up to the element at the given index. + */ + public final int getTotalValueLengthUpToIndex(int index) { + int totalLength = 0; + for (int i = 0; i < index - 1; i++) { + totalLength += getValueLength(i); + } + return totalLength; + } + + protected final void handleSafe(int index, int dataLength) { + final long lastSetCapacity = lastSet < 0 ? 0 : (long) index * ELEMENT_SIZE; + final long targetCapacity = roundUpToMultipleOf16(lastSetCapacity + dataLength); + // for views, we need each buffer with 16 byte alignment, so we need to check the last written index + // in the viewBuffer and allocate a new buffer which has 16 byte alignment for adding new values. + long writePosition = (long) index * ELEMENT_SIZE; + if (viewBuffer.capacity() <= writePosition || viewBuffer.capacity() < targetCapacity) { + /* + * Everytime we want to increase the capacity of the viewBuffer, we need to make sure that the new capacity + * meets 16 byte alignment. + * If the targetCapacity is larger than the writePosition, we may not necessarily + * want to allocate the targetCapacity to viewBuffer since when it is >={@link #INLINE_SIZE} either way + * we are writing to the dataBuffer. + */ + reallocViewBuffer(Math.max(writePosition, targetCapacity)); + } + + while (index >= getValueCapacity()) { + reallocValidityBuffer(); + } + } + + /** + * Copy a cell value from a particular index in source vector to a particular position in this + * vector. + * @param fromIndex position to copy from in source vector + * @param thisIndex position to copy to in this vector + * @param from source vector + */ + @Override + public void copyFrom(int fromIndex, int thisIndex, ValueVector from) { + Preconditions.checkArgument(getMinorType() == from.getMinorType()); + if (from.isNull(fromIndex)) { + BitVectorHelper.unsetBit(validityBuffer, thisIndex); + } else { + final int viewLength = from.getDataBuffer().getInt((long) fromIndex * ELEMENT_SIZE); + BitVectorHelper.setBit(validityBuffer, thisIndex); + final int start = thisIndex * ELEMENT_SIZE; + final int copyStart = fromIndex * ELEMENT_SIZE; + from.getDataBuffer().getBytes(start, viewBuffer, copyStart, ELEMENT_SIZE); + if (viewLength > INLINE_SIZE) { + final int bufIndex = from.getDataBuffer().getInt(((long) fromIndex * ELEMENT_SIZE) + + LENGTH_WIDTH + PREFIX_WIDTH); + final int dataOffset = from.getDataBuffer().getInt(((long) fromIndex * ELEMENT_SIZE) + + LENGTH_WIDTH + PREFIX_WIDTH + BUF_INDEX_WIDTH); + final ArrowBuf dataBuf = ((BaseVariableWidthViewVector) from).dataBuffers.get(bufIndex); + final ArrowBuf thisDataBuf = allocateOrGetLastDataBuffer(viewLength); + thisDataBuf.setBytes(thisDataBuf.writerIndex(), dataBuf, dataOffset, viewLength); + thisDataBuf.writerIndex(thisDataBuf.writerIndex() + viewLength); + } + } + lastSet = thisIndex; + } + + /** + * Same as {@link #copyFrom(int, int, ValueVector)} except that it handles the case when the + * capacity of the vector needs to be expanded before copy. + * @param fromIndex position to copy from in source vector + * @param thisIndex position to copy to in this vector + * @param from source vector + */ + @Override + public void copyFromSafe(int fromIndex, int thisIndex, ValueVector from) { + Preconditions.checkArgument(getMinorType() == from.getMinorType()); + if (from.isNull(fromIndex)) { + handleSafe(thisIndex, 0); + BitVectorHelper.unsetBit(validityBuffer, thisIndex); + } else { + final int viewLength = from.getDataBuffer().getInt((long) fromIndex * ELEMENT_SIZE); + handleSafe(thisIndex, viewLength); + BitVectorHelper.setBit(validityBuffer, thisIndex); + final int start = thisIndex * ELEMENT_SIZE; + final int copyStart = fromIndex * ELEMENT_SIZE; + from.getDataBuffer().getBytes(start, viewBuffer, copyStart, ELEMENT_SIZE); + if (viewLength > INLINE_SIZE) { + final int bufIndex = from.getDataBuffer().getInt(((long) fromIndex * ELEMENT_SIZE) + + LENGTH_WIDTH + PREFIX_WIDTH); + final int dataOffset = from.getDataBuffer().getInt(((long) fromIndex * ELEMENT_SIZE) + + LENGTH_WIDTH + PREFIX_WIDTH + BUF_INDEX_WIDTH); + final ArrowBuf dataBuf = ((BaseVariableWidthViewVector) from).dataBuffers.get(bufIndex); + final ArrowBuf thisDataBuf = allocateOrGetLastDataBuffer(viewLength); + thisDataBuf.setBytes(thisDataBuf.writerIndex(), dataBuf, dataOffset, viewLength); + thisDataBuf.writerIndex(thisDataBuf.writerIndex() + viewLength); + } + } + lastSet = thisIndex; + } + + @Override + public ArrowBufPointer getDataPointer(int index) { + return getDataPointer(index, new ArrowBufPointer()); + } + + @Override + public ArrowBufPointer getDataPointer(int index, ArrowBufPointer reuse) { + if (isNull(index)) { + reuse.set(null, 0, 0); + } else { + int length = getValueLength(index); + if (length < INLINE_SIZE) { + int start = index * ELEMENT_SIZE + LENGTH_WIDTH; + reuse.set(viewBuffer, start, length); + } else { + final int bufIndex = + viewBuffer.getInt(((long) index * ELEMENT_SIZE) + LENGTH_WIDTH + PREFIX_WIDTH); + ArrowBuf dataBuf = dataBuffers.get(bufIndex); + reuse.set(dataBuf, 0, length); + } + + } + return reuse; + } + + @Override + public int hashCode(int index) { + return hashCode(index, null); + } + + @Override + public int hashCode(int index, ArrowBufHasher hasher) { + if (isNull(index)) { + return ArrowBufPointer.NULL_HASH_CODE; + } + final int length = getValueLength(index); + if (length < INLINE_SIZE) { + int start = index * ELEMENT_SIZE + LENGTH_WIDTH; + return ByteFunctionHelpers.hash(hasher, this.getDataBuffer(), start, start + length); + } else { + final int bufIndex = + viewBuffer.getInt(((long) index * ELEMENT_SIZE) + LENGTH_WIDTH + PREFIX_WIDTH); + final int dataOffset = + viewBuffer.getInt( + ((long) index * ELEMENT_SIZE) + LENGTH_WIDTH + PREFIX_WIDTH + BUF_INDEX_WIDTH); + ArrowBuf dataBuf = dataBuffers.get(bufIndex); + return ByteFunctionHelpers.hash(hasher, dataBuf, dataOffset, dataOffset + length); + } + } + + /** + * Retrieves the data of a variable-width element at a given index in the vector. + * + *

    + * If the length of the data is greater than {@link #INLINE_SIZE}, the data is stored in an inline buffer. + * The method retrieves the buffer index and data offset from the viewBuffer, and then retrieves the data from the + * corresponding buffer in the dataBuffers list. + *

    + * If the length of the data is less than or equal to {@link #INLINE_SIZE}, the data is stored directly in the + * viewBuffer. + * The method retrieves the data directly from the viewBuffer. + * + * @param index position of the element in the vector + * @return byte array containing the data of the element + */ + protected byte[] getData(int index) { + final int dataLength = getValueLength(index); + byte[] result = new byte[dataLength]; + if (dataLength > INLINE_SIZE) { + // data is in the data buffer + // get buffer index + final int bufferIndex = + viewBuffer.getInt(((long) index * ELEMENT_SIZE) + LENGTH_WIDTH + PREFIX_WIDTH); + // get data offset + final int dataOffset = + viewBuffer.getInt( + ((long) index * ELEMENT_SIZE) + LENGTH_WIDTH + PREFIX_WIDTH + BUF_INDEX_WIDTH); + dataBuffers.get(bufferIndex).getBytes(dataOffset, result, 0, dataLength); + } else { + // data is in the view buffer + viewBuffer.getBytes( + (long) index * ELEMENT_SIZE + BUF_INDEX_WIDTH, result, 0, dataLength); + } + return result; + } + + protected void getData(int index, ReusableBuffer buffer) { + final int dataLength = getValueLength(index); + if (dataLength > INLINE_SIZE) { + // data is in the data buffer + // get buffer index + final int bufferIndex = + viewBuffer.getInt(((long) index * ELEMENT_SIZE) + LENGTH_WIDTH + PREFIX_WIDTH); + // get data offset + final int dataOffset = + viewBuffer.getInt( + ((long) index * ELEMENT_SIZE) + LENGTH_WIDTH + PREFIX_WIDTH + BUF_INDEX_WIDTH); + ArrowBuf dataBuf = dataBuffers.get(bufferIndex); + buffer.set(dataBuf, dataOffset, dataLength); + } else { + // data is in the value buffer + buffer.set(viewBuffer, ((long) index * ELEMENT_SIZE) + BUF_INDEX_WIDTH, dataLength); + } + } + + @Override + public OUT accept(VectorVisitor visitor, IN value) { + return visitor.visit(this, value); + } +} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/BufferLayout.java b/java/vector/src/main/java/org/apache/arrow/vector/BufferLayout.java index 9725693348a48..4eeb92a0c9199 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/BufferLayout.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/BufferLayout.java @@ -28,12 +28,18 @@ public class BufferLayout { /** * Enumeration of the different logical types a buffer can have. + * Data buffer is common to most of the layouts. + * Offset buffer is used for variable width types. + * Validity buffer is used for nullable types. + * Type buffer is used for Union types. + * Size buffer is used for ListView and LargeListView types. */ public enum BufferType { DATA("DATA"), OFFSET("OFFSET"), VALIDITY("VALIDITY"), - TYPE("TYPE_ID"); + TYPE("TYPE_ID"), + SIZE("SIZE"); private final String name; @@ -57,6 +63,7 @@ public String getName() { private static final BufferLayout VALUES_32 = new BufferLayout(BufferType.DATA, 32); private static final BufferLayout VALUES_16 = new BufferLayout(BufferType.DATA, 16); private static final BufferLayout VALUES_8 = new BufferLayout(BufferType.DATA, 8); + private static final BufferLayout SIZE_BUFFER = new BufferLayout(BufferType.SIZE, 32); public static BufferLayout typeBuffer() { return TYPE_BUFFER; @@ -70,6 +77,10 @@ public static BufferLayout largeOffsetBuffer() { return LARGE_OFFSET_BUFFER; } + public static BufferLayout sizeBuffer() { + return SIZE_BUFFER; + } + /** * Returns a databuffer for the given bitwidth. Only supports powers of two between 8 and 128 * inclusive. diff --git a/java/vector/src/main/java/org/apache/arrow/vector/LargeVarBinaryVector.java b/java/vector/src/main/java/org/apache/arrow/vector/LargeVarBinaryVector.java index 8560ba3a68b04..25c83260ef3ed 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/LargeVarBinaryVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/LargeVarBinaryVector.java @@ -119,6 +119,7 @@ public byte[] get(int index) { * @param index position of element. * @param buffer the buffer to write into. */ + @Override public void read(int index, ReusableBuffer buffer) { final long startOffset = getStartOffset(index); final long dataLength = getEndOffset(index) - startOffset; diff --git a/java/vector/src/main/java/org/apache/arrow/vector/LargeVarCharVector.java b/java/vector/src/main/java/org/apache/arrow/vector/LargeVarCharVector.java index df424c87488a0..bc3a1e09aaa79 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/LargeVarCharVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/LargeVarCharVector.java @@ -103,6 +103,7 @@ public Types.MinorType getMinorType() { * @param index position of element to get * @return array of bytes for non-null element, null otherwise */ + @Override public byte[] get(int index) { assert index >= 0; if (isSet(index) == 0) { @@ -140,6 +141,7 @@ public Text getObject(int index) { * @param index position of element. * @param buffer the buffer to write into. */ + @Override public void read(int index, ReusableBuffer buffer) { final long startOffset = getStartOffset(index); final long dataLength = getEndOffset(index) - startOffset; @@ -298,7 +300,7 @@ public void validateScalars() { *----------------------------------------------------------------*/ /** - * Construct a TransferPair comprising of this and a target vector of + * Construct a TransferPair comprising this and a target vector of * the same type. * * @param ref name of the target vector diff --git a/java/vector/src/main/java/org/apache/arrow/vector/TypeLayout.java b/java/vector/src/main/java/org/apache/arrow/vector/TypeLayout.java index ae465418cf2fd..0d01d77632bde 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/TypeLayout.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/TypeLayout.java @@ -28,6 +28,7 @@ import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.types.pojo.ArrowType.ArrowTypeVisitor; import org.apache.arrow.vector.types.pojo.ArrowType.Binary; +import org.apache.arrow.vector.types.pojo.ArrowType.BinaryView; import org.apache.arrow.vector.types.pojo.ArrowType.Bool; import org.apache.arrow.vector.types.pojo.ArrowType.Date; import org.apache.arrow.vector.types.pojo.ArrowType.Decimal; @@ -46,6 +47,7 @@ import org.apache.arrow.vector.types.pojo.ArrowType.Timestamp; import org.apache.arrow.vector.types.pojo.ArrowType.Union; import org.apache.arrow.vector.types.pojo.ArrowType.Utf8; +import org.apache.arrow.vector.types.pojo.ArrowType.Utf8View; /** * The buffer layout of vectors for a given type. @@ -100,7 +102,7 @@ public TypeLayout visit(Timestamp type) { } @Override - public TypeLayout visit(org.apache.arrow.vector.types.pojo.ArrowType.List type) { + public TypeLayout visit(ArrowType.List type) { List vectors = asList( BufferLayout.validityVector(), BufferLayout.offsetBuffer() @@ -108,6 +110,16 @@ public TypeLayout visit(org.apache.arrow.vector.types.pojo.ArrowType.List type) return new TypeLayout(vectors); } + @Override + public TypeLayout visit(ArrowType.ListView type) { + List vectors = asList( + BufferLayout.validityVector(), + BufferLayout.offsetBuffer(), + BufferLayout.sizeBuffer() + ); + return new TypeLayout(vectors); + } + @Override public TypeLayout visit(ArrowType.LargeList type) { List vectors = asList( @@ -173,11 +185,21 @@ public TypeLayout visit(Binary type) { return newVariableWidthTypeLayout(); } + @Override + public TypeLayout visit(ArrowType.BinaryView type) { + return newVariableWidthViewTypeLayout(); + } + @Override public TypeLayout visit(Utf8 type) { return newVariableWidthTypeLayout(); } + @Override + public TypeLayout visit(Utf8View type) { + return newVariableWidthViewTypeLayout(); + } + @Override public TypeLayout visit(LargeUtf8 type) { return newLargeVariableWidthTypeLayout(); @@ -193,7 +215,12 @@ private TypeLayout newVariableWidthTypeLayout() { BufferLayout.byteVector()); } + private TypeLayout newVariableWidthViewTypeLayout() { + return newPrimitiveTypeLayout(BufferLayout.validityVector(), BufferLayout.byteVector()); + } + private TypeLayout newLargeVariableWidthTypeLayout() { + // NOTE: only considers the non variadic buffers return newPrimitiveTypeLayout(BufferLayout.validityVector(), BufferLayout.largeOffsetBuffer(), BufferLayout.byteVector()); } @@ -299,11 +326,17 @@ public Integer visit(Timestamp type) { } @Override - public Integer visit(org.apache.arrow.vector.types.pojo.ArrowType.List type) { + public Integer visit(ArrowType.List type) { // validity buffer + offset buffer return 2; } + @Override + public Integer visit(ArrowType.ListView type) { + // validity buffer + offset buffer + size buffer + return 3; + } + @Override public Integer visit(ArrowType.LargeList type) { // validity buffer + offset buffer @@ -347,11 +380,23 @@ public Integer visit(Binary type) { return VARIABLE_WIDTH_BUFFER_COUNT; } + @Override + public Integer visit(BinaryView type) { + // NOTE: only consider the validity and view buffers + return 2; + } + @Override public Integer visit(Utf8 type) { return VARIABLE_WIDTH_BUFFER_COUNT; } + @Override + public Integer visit(Utf8View type) { + // NOTE: only consider the validity and view buffers + return 2; + } + @Override public Integer visit(LargeUtf8 type) { return VARIABLE_WIDTH_BUFFER_COUNT; diff --git a/java/vector/src/main/java/org/apache/arrow/vector/VarBinaryVector.java b/java/vector/src/main/java/org/apache/arrow/vector/VarBinaryVector.java index ab67ebad965aa..82d4feda9a991 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/VarBinaryVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/VarBinaryVector.java @@ -120,6 +120,7 @@ public byte[] get(int index) { * @param index position of element. * @param buffer the buffer to write into. */ + @Override public void read(int index, ReusableBuffer buffer) { final int startOffset = getStartOffset(index); final int dataLength = getEndOffset(index) - startOffset; diff --git a/java/vector/src/main/java/org/apache/arrow/vector/VarCharVector.java b/java/vector/src/main/java/org/apache/arrow/vector/VarCharVector.java index c6d5a7090bc6f..fde9459e60084 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/VarCharVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/VarCharVector.java @@ -137,6 +137,7 @@ public Text getObject(int index) { * @param index position of element. * @param buffer the buffer to write into. */ + @Override public void read(int index, ReusableBuffer buffer) { final int startOffset = getStartOffset(index); final int dataLength = getEndOffset(index) - startOffset; diff --git a/java/vector/src/main/java/org/apache/arrow/vector/VariableWidthFieldVector.java b/java/vector/src/main/java/org/apache/arrow/vector/VariableWidthFieldVector.java new file mode 100644 index 0000000000000..58b6940a81a14 --- /dev/null +++ b/java/vector/src/main/java/org/apache/arrow/vector/VariableWidthFieldVector.java @@ -0,0 +1,143 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.vector; + +import java.nio.ByteBuffer; + +import org.apache.arrow.memory.ReusableBuffer; + +/** +* A base interface for common functionalities in variable width vectors. +*/ +public interface VariableWidthFieldVector extends VariableWidthVector, FieldVector, VectorDefinitionSetter { + + /** + * Set the variable length element at the specified index to the supplied byte array. + * + * @param index position of the element to set + * @param value array of bytes with data + */ + void set(int index, byte[] value); + + /** + * Set the variable length element at the specified index to the supplied byte array. + * + * @param index position of the element to set + * @param value array of bytes with data + * @param start start position in the array + * @param length length of the data to write + */ + void set(int index, byte[] value, int start, int length); + + /** + * Set the variable length element at the specified index to the supplied ByteBuffer. + * + * @param index position of the element to set + * @param value ByteBuffer with data + * @param start start position in the ByteBuffer + * @param length length of the data to write + */ + void set(int index, ByteBuffer value, int start, int length); + + /** + * Set the variable length element at the specified index to the supplied byte array, and it + * handles the case where index and length of a new element are beyond the existing capacity of the + * vector. + * + * @param index position of the element to set + * @param value array of bytes to write + */ + void setSafe(int index, byte[] value); + + /** + * Set the variable length element at the specified index to the supplied byte array, and it + * handles the case where index and length of a new element are beyond the existing capacity. + * + * @param index position of the element to set + * @param value array of bytes with data + * @param start start position in the array + * @param length length of the data to write + */ + void setSafe(int index, byte[] value, int start, int length); + + /** + * Set the variable length element at the specified index to the supplied ByteBuffer, and it + * handles the case where index and length of a new element are beyond the existing capacity. + * + * @param index position of the element to set + * @param value ByteBuffer with data + * @param start start position in the ByteBuffer + * @param length length of the data to write + */ + void setSafe(int index, ByteBuffer value, int start, int length); + + /** + * Get the variable length element at the specified index. + * + * @param index position of the element to get + * @return byte array with the data + */ + byte[] get(int index); + + /** + * Get the variable length element at the specified index using a ReusableBuffer. + * + * @param index position of the element to get + * @param buffer ReusableBuffer to write the data to + */ + void read(int index, ReusableBuffer buffer); + + /** + * Get the index of the last non-null element in the vector. + * + * @return index of the last non-null element + */ + int getLastSet(); + + /** + * Set the index of the last non-null element in the vector. + * + * @param value desired index of last non-null element + */ + void setLastSet(int value); + + /** + * Get the variable length element at specified index as Text. + * + * @param index position of an element to get + * @return greater than length 0 for a non-null element, 0 otherwise + */ + int getValueLength(int index); + + /** + * Create holes in the vector upto the given index (exclusive). + * Holes will be created from the current last-set position in + * the vector. + * + * @param index target index + */ + void fillEmpties(int index); + + /** + * Sets the value length for an element. + * + * @param index position of the element to set + * @param length length of the element + */ + void setValueLengthSafe(int index, int length); +} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/VectorLoader.java b/java/vector/src/main/java/org/apache/arrow/vector/VectorLoader.java index 510cef24c7e16..dec536ae6cc1f 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/VectorLoader.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/VectorLoader.java @@ -20,6 +20,7 @@ import static org.apache.arrow.util.Preconditions.checkArgument; import java.util.ArrayList; +import java.util.Collections; import java.util.Iterator; import java.util.List; @@ -80,13 +81,19 @@ public void load(ArrowRecordBatch recordBatch) { CompressionUtil.CodecType.fromCompressionType(recordBatch.getBodyCompression().getCodec()); decompressionNeeded = codecType != CompressionUtil.CodecType.NO_COMPRESSION; CompressionCodec codec = decompressionNeeded ? factory.createCodec(codecType) : NoCompressionCodec.INSTANCE; + Iterator variadicBufferCounts = Collections.emptyIterator();; + if (recordBatch.getVariadicBufferCounts() != null && !recordBatch.getVariadicBufferCounts().isEmpty()) { + variadicBufferCounts = recordBatch.getVariadicBufferCounts().iterator(); + } + for (FieldVector fieldVector : root.getFieldVectors()) { - loadBuffers(fieldVector, fieldVector.getField(), buffers, nodes, codec); + loadBuffers(fieldVector, fieldVector.getField(), buffers, nodes, codec, variadicBufferCounts); } root.setRowCount(recordBatch.getLength()); - if (nodes.hasNext() || buffers.hasNext()) { - throw new IllegalArgumentException("not all nodes and buffers were consumed. nodes: " + - Collections2.toString(nodes) + " buffers: " + Collections2.toString(buffers)); + if (nodes.hasNext() || buffers.hasNext() || variadicBufferCounts.hasNext()) { + throw new IllegalArgumentException("not all nodes, buffers and variadicBufferCounts were consumed. nodes: " + + Collections2.toString(nodes) + " buffers: " + Collections2.toString(buffers) + " variadicBufferCounts: " + + Collections2.toString(variadicBufferCounts)); } } @@ -95,10 +102,20 @@ private void loadBuffers( Field field, Iterator buffers, Iterator nodes, - CompressionCodec codec) { + CompressionCodec codec, + Iterator variadicBufferCounts) { checkArgument(nodes.hasNext(), "no more field nodes for field %s and vector %s", field, vector); ArrowFieldNode fieldNode = nodes.next(); - int bufferLayoutCount = TypeLayout.getTypeBufferCount(field.getType()); + // variadicBufferLayoutCount will be 0 for vectors of a type except BaseVariableWidthViewVector + long variadicBufferLayoutCount = 0; + if (vector instanceof BaseVariableWidthViewVector) { + if (variadicBufferCounts.hasNext()) { + variadicBufferLayoutCount = variadicBufferCounts.next(); + } else { + throw new IllegalStateException("No variadicBufferCounts available for BaseVariableWidthViewVector"); + } + } + int bufferLayoutCount = (int) (variadicBufferLayoutCount + TypeLayout.getTypeBufferCount(field.getType())); List ownBuffers = new ArrayList<>(bufferLayoutCount); for (int j = 0; j < bufferLayoutCount; j++) { ArrowBuf nextBuf = buffers.next(); @@ -130,7 +147,7 @@ private void loadBuffers( for (int i = 0; i < childrenFromFields.size(); i++) { Field child = children.get(i); FieldVector fieldVector = childrenFromFields.get(i); - loadBuffers(fieldVector, child, buffers, nodes, codec); + loadBuffers(fieldVector, child, buffers, nodes, codec, variadicBufferCounts); } } } diff --git a/java/vector/src/main/java/org/apache/arrow/vector/VectorSchemaRoot.java b/java/vector/src/main/java/org/apache/arrow/vector/VectorSchemaRoot.java index 8768a90c80b83..9a92ce5060b1b 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/VectorSchemaRoot.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/VectorSchemaRoot.java @@ -19,6 +19,7 @@ import java.util.ArrayList; import java.util.Arrays; +import java.util.Collections; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; @@ -121,7 +122,7 @@ public VectorSchemaRoot(Schema schema, List fieldVectors, int rowCo * Creates a new set of empty vectors corresponding to the given schema. */ public static VectorSchemaRoot create(Schema schema, BufferAllocator allocator) { - List fieldVectors = new ArrayList<>(); + List fieldVectors = new ArrayList<>(schema.getFields().size()); for (Field field : schema.getFields()) { FieldVector vector = field.createVector(allocator); fieldVectors.add(vector); @@ -160,7 +161,7 @@ public void clear() { } public List getFieldVectors() { - return fieldVectors.stream().collect(Collectors.toList()); + return Collections.unmodifiableList(fieldVectors); } /** @@ -236,7 +237,7 @@ public int getRowCount() { */ public void setRowCount(int rowCount) { this.rowCount = rowCount; - for (FieldVector v : getFieldVectors()) { + for (FieldVector v : fieldVectors) { v.setValueCount(rowCount); } } diff --git a/java/vector/src/main/java/org/apache/arrow/vector/VectorUnloader.java b/java/vector/src/main/java/org/apache/arrow/vector/VectorUnloader.java index 1d44e37ac71af..6e7ab34eba9de 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/VectorUnloader.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/VectorUnloader.java @@ -80,19 +80,33 @@ public VectorUnloader( public ArrowRecordBatch getRecordBatch() { List nodes = new ArrayList<>(); List buffers = new ArrayList<>(); + List variadicBufferCounts = new ArrayList<>(); for (FieldVector vector : root.getFieldVectors()) { - appendNodes(vector, nodes, buffers); + appendNodes(vector, nodes, buffers, variadicBufferCounts); } // Do NOT retain buffers in ArrowRecordBatch constructor since we have already retained them. return new ArrowRecordBatch( - root.getRowCount(), nodes, buffers, CompressionUtil.createBodyCompression(codec), alignBuffers, - /*retainBuffers*/ false); + root.getRowCount(), nodes, buffers, CompressionUtil.createBodyCompression(codec), + variadicBufferCounts, alignBuffers, /*retainBuffers*/ false); } - private void appendNodes(FieldVector vector, List nodes, List buffers) { + private long getVariadicBufferCount(FieldVector vector) { + if (vector instanceof BaseVariableWidthViewVector) { + return ((BaseVariableWidthViewVector) vector).getDataBuffers().size(); + } + return 0L; + } + + private void appendNodes(FieldVector vector, List nodes, List buffers, + List variadicBufferCounts) { nodes.add(new ArrowFieldNode(vector.getValueCount(), includeNullCount ? vector.getNullCount() : -1)); List fieldBuffers = vector.getFieldBuffers(); - int expectedBufferCount = TypeLayout.getTypeBufferCount(vector.getField().getType()); + long variadicBufferCount = getVariadicBufferCount(vector); + int expectedBufferCount = (int) (TypeLayout.getTypeBufferCount(vector.getField().getType()) + variadicBufferCount); + // only update variadicBufferCounts for vectors that have variadic buffers + if (variadicBufferCount > 0) { + variadicBufferCounts.add(variadicBufferCount); + } if (fieldBuffers.size() != expectedBufferCount) { throw new IllegalArgumentException(String.format( "wrong number of buffers for field %s in vector %s. found: %s", @@ -107,7 +121,7 @@ private void appendNodes(FieldVector vector, List nodes, List= 0; + if (NULL_CHECKING_ENABLED && isSet(index) == 0) { + return null; + } + return getData(index); + } + + /** + * Read the value at the given position to the given output buffer. The caller is responsible for + * checking for nullity first. + * + * @param index position of an element. + * @param buffer the buffer to write into. + */ + @Override + public void read(int index, ReusableBuffer buffer) { + getData(index, buffer); + } + + /** + * Get the variable length element at a specified index as a byte array. + * + * @param index position of an element to get + * @return byte array for a non-null element, null otherwise + */ + @Override + public byte[] getObject(int index) { + return get(index); + } + + /** + * Get the variable length element at specified index and sets the state in provided holder. + * + * @param index position of an element to get + * @param holder data holder to be populated by this function + */ + public void get(int index, NullableViewVarBinaryHolder holder) { + // TODO: https://github.com/apache/arrow/issues/40936 + throw new UnsupportedOperationException("Unsupported operation"); + } + + /*----------------------------------------------------------------* + | | + | vector value setter methods | + | | + *----------------------------------------------------------------*/ + + /** + * Set the variable length element at the specified index to the data buffer supplied in the + * holder. + * + * @param index position of the element to set + * @param holder holder that carries data buffer. + */ + public void set(int index, ViewVarBinaryHolder holder) { + // TODO: https://github.com/apache/arrow/issues/40936 + throw new UnsupportedOperationException("Unsupported operation"); + } + + /** + * Same as {@link #set(int, ViewVarBinaryHolder)} except that it handles the case where index and + * length of a new element are beyond the existing capacity of the vector. + * + * @param index position of the element to set + * @param holder holder that carries data buffer. + */ + public void setSafe(int index, ViewVarBinaryHolder holder) { + // TODO: https://github.com/apache/arrow/issues/40936 + throw new UnsupportedOperationException("Unsupported operation"); + } + + /** + * Set the variable length element at the specified index to the data buffer supplied in the + * holder. + * + * @param index position of the element to set + * @param holder holder that carries data buffer. + */ + public void set(int index, NullableViewVarBinaryHolder holder) { + // TODO: https://github.com/apache/arrow/issues/40936 + throw new UnsupportedOperationException("Unsupported operation"); + } + + /** + * Same as {@link #set(int, NullableViewVarBinaryHolder)} except that it handles the case where index + * and length of a new element are beyond the existing capacity of the vector. + * + * @param index position of the element to set + * @param holder holder that carries data buffer. + */ + public void setSafe(int index, NullableViewVarBinaryHolder holder) { + // TODO: https://github.com/apache/arrow/issues/40936 + throw new UnsupportedOperationException("Unsupported operation"); + } + + /*----------------------------------------------------------------* + | | + | vector transfer | + | | + *----------------------------------------------------------------*/ + + /** + * Construct a TransferPair comprising this and a target vector of the same type. + * + * @param ref name of the target vector + * @param allocator allocator for the target vector + * @return {@link TransferPair} + */ + @Override + public TransferPair getTransferPair(String ref, BufferAllocator allocator) { + return new TransferImpl(ref, allocator); + } + + @Override + public TransferPair getTransferPair(Field field, BufferAllocator allocator) { + return new TransferImpl(field, allocator); + } + + /** + * Construct a TransferPair with a desired target vector of the same type. + * + * @param to target vector + * @return {@link TransferPair} + */ + @Override + public TransferPair makeTransferPair(ValueVector to) { + return new TransferImpl((ViewVarBinaryVector) to); + } + + private class TransferImpl implements TransferPair { + ViewVarBinaryVector to; + + public TransferImpl(String ref, BufferAllocator allocator) { + to = new ViewVarBinaryVector(ref, field.getFieldType(), allocator); + } + + public TransferImpl(Field field, BufferAllocator allocator) { + to = new ViewVarBinaryVector(field, allocator); + } + + public TransferImpl(ViewVarBinaryVector to) { + this.to = to; + } + + @Override + public ViewVarBinaryVector getTo() { + return to; + } + + @Override + public void transfer() { + transferTo(to); + } + + @Override + public void splitAndTransfer(int startIndex, int length) { + splitAndTransferTo(startIndex, length, to); + } + + @Override + public void copyValueSafe(int fromIndex, int toIndex) { + to.copyFromSafe(fromIndex, toIndex, ViewVarBinaryVector.this); + } + } +} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/ViewVarCharVector.java b/java/vector/src/main/java/org/apache/arrow/vector/ViewVarCharVector.java new file mode 100644 index 0000000000000..400f8cb1fc2e0 --- /dev/null +++ b/java/vector/src/main/java/org/apache/arrow/vector/ViewVarCharVector.java @@ -0,0 +1,321 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.vector; + +import static org.apache.arrow.vector.NullCheckingForGet.NULL_CHECKING_ENABLED; + +import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.memory.ReusableBuffer; +import org.apache.arrow.vector.complex.impl.ViewVarCharReaderImpl; +import org.apache.arrow.vector.complex.reader.FieldReader; +import org.apache.arrow.vector.holders.NullableViewVarCharHolder; +import org.apache.arrow.vector.holders.ViewVarCharHolder; +import org.apache.arrow.vector.types.Types.MinorType; +import org.apache.arrow.vector.types.pojo.Field; +import org.apache.arrow.vector.types.pojo.FieldType; +import org.apache.arrow.vector.util.Text; +import org.apache.arrow.vector.util.TransferPair; +import org.apache.arrow.vector.validate.ValidateUtil; + +/** + * ViewVarCharVector implements a view of a variable width vector of VARCHAR + * values which could be NULL. A validity buffer (bit vector) is maintained + * to track which elements in the vector are null. A viewBuffer keeps track + * of all values in the vector, and an external data buffer is kept to keep longer + * strings (>12). + */ +public final class ViewVarCharVector extends BaseVariableWidthViewVector { + + /** + * Instantiate a ViewVarCharVector. This doesn't allocate any memory for + * the data in vector. + * @param name name of the vector + * @param allocator allocator for memory management. + */ + public ViewVarCharVector(String name, BufferAllocator allocator) { + this(name, FieldType.nullable(MinorType.VIEWVARCHAR.getType()), allocator); + } + + /** + * Instantiate a ViewVarCharVector. This doesn't allocate any memory for + * the data in vector. + * @param name name of the vector + * @param fieldType type of Field materialized by this vector + * @param allocator allocator for memory management. + */ + public ViewVarCharVector(String name, FieldType fieldType, BufferAllocator allocator) { + this(new Field(name, fieldType, null), allocator); + } + + /** + * Instantiate a ViewVarCharVector. This doesn't allocate any memory for + * the data in vector. + * + * @param field field materialized by this vector + * @param allocator allocator for memory management. + */ + public ViewVarCharVector(Field field, BufferAllocator allocator) { + super(field, allocator); + } + + @Override + protected FieldReader getReaderImpl() { + return new ViewVarCharReaderImpl(ViewVarCharVector.this); + } + + /** + * Get a minor type for this vector. The vector holds values belonging + * to a particular type. + * @return {@link org.apache.arrow.vector.types.Types.MinorType} + */ + @Override + public MinorType getMinorType() { + return MinorType.VIEWVARCHAR; + } + + /*----------------------------------------------------------------* + | | + | vector value retrieval methods | + | | + *----------------------------------------------------------------*/ + + /** + * Get the variable length element at specified index as a byte array. + * + * @param index position of an element to get + * @return array of bytes for a non-null element, null otherwise + */ + public byte[] get(int index) { + assert index >= 0; + if (NULL_CHECKING_ENABLED && isSet(index) == 0) { + return null; + } + return getData(index); + } + + /** + * Get the variable length element at specified index as Text. + * + * @param index position of an element to get + * @return Text object for a non-null element, null otherwise + */ + @Override + public Text getObject(int index) { + assert index >= 0; + if (NULL_CHECKING_ENABLED && isSet(index) == 0) { + return null; + } + + final Text result = new Text(); + read(index, result); + return result; + } + + /** + * Read the value at the given position to the given output buffer. + * The caller is responsible for checking for nullity first. + * + * @param index position of an element. + * @param buffer the buffer to write into. + */ + @Override + public void read(int index, ReusableBuffer buffer) { + getData(index, buffer); + } + + /** + * Get the variable length element at specified index and sets the state + * in provided holder. + * + * @param index position of an element to get + * @param holder data holder to be populated by this function + */ + public void get(int index, NullableViewVarCharHolder holder) { + // TODO: https://github.com/apache/arrow/issues/40937 + throw new UnsupportedOperationException("NullableViewVarCharHolder get operation not supported"); + } + + + /*----------------------------------------------------------------* + | | + | vector value setter methods | + | | + *----------------------------------------------------------------*/ + + + /** + * Set the variable length element at the specified index to the data + * buffer supplied in the holder. + * + * @param index position of the element to set + * @param holder holder that carries data buffer. + */ + public void set(int index, ViewVarCharHolder holder) { + // TODO: https://github.com/apache/arrow/issues/40937 + throw new UnsupportedOperationException("ViewVarCharHolder set operation not supported"); + } + + /** + * Same as {@link #set(int, ViewVarCharHolder)} except that it handles the + * case where index and length of a new element are beyond the existing + * capacity of the vector. + * + * @param index position of the element to set + * @param holder holder that carries data buffer. + */ + public void setSafe(int index, ViewVarCharHolder holder) { + // TODO: https://github.com/apache/arrow/issues/40937 + throw new UnsupportedOperationException("ViewVarCharHolder setSafe operation not supported"); + } + + /** + * Set the variable length element at the specified index to the data + * buffer supplied in the holder. + * + * @param index position of the element to set + * @param holder holder that carries data buffer. + */ + public void set(int index, NullableViewVarCharHolder holder) { + // TODO: https://github.com/apache/arrow/issues/40937 + throw new UnsupportedOperationException("NullableViewVarCharHolder set operation not supported"); + } + + /** + * Same as {@link #set(int, NullableViewVarCharHolder)} except that it handles the + * case where index and length of a new element are beyond the existing + * capacity of the vector. + * + * @param index position of the element to set + * @param holder holder that carries data buffer. + */ + public void setSafe(int index, NullableViewVarCharHolder holder) { + // TODO: https://github.com/apache/arrow/issues/40937 + throw new UnsupportedOperationException("NullableViewVarCharHolder setSafe operation not supported"); + } + + /** + * Set the variable length element at the specified index to the + * content in supplied Text. + * + * @param index position of the element to set + * @param text Text object with data + */ + public void set(int index, Text text) { + set(index, text.getBytes(), 0, (int) text.getLength()); + } + + /** + * Same as {@link #set(int, NullableViewVarCharHolder)} except that it handles the + * case where index and length of a new element are beyond the existing + * capacity of the vector. + * + * @param index position of the element to set. + * @param text Text object with data + */ + public void setSafe(int index, Text text) { + setSafe(index, text.getBytes(), 0, (int) text.getLength()); + } + + @Override + public void validateScalars() { + for (int i = 0; i < getValueCount(); ++i) { + byte[] value = get(i); + if (value != null) { + ValidateUtil.validateOrThrow(Text.validateUTF8NoThrow(value), + "Non-UTF-8 data in VarCharVector at position " + i + "."); + } + } + } + + /*----------------------------------------------------------------* + | | + | vector transfer | + | | + *----------------------------------------------------------------*/ + + /** + * Construct a TransferPair comprising this and a target vector of the same type. + * + * @param ref name of the target vector + * @param allocator allocator for the target vector + * @return {@link TransferPair} (UnsupportedOperationException) + */ + @Override + public TransferPair getTransferPair(String ref, BufferAllocator allocator) { + return new TransferImpl(ref, allocator); + } + + /** + * Construct a TransferPair with a desired target vector of the same type. + * + * @param field The field materialized by this vector. + * @param allocator allocator for the target vector + * @return {@link TransferPair} (UnsupportedOperationException) + */ + @Override + public TransferPair getTransferPair(Field field, BufferAllocator allocator) { + return new TransferImpl(field, allocator); + } + + /** + * Construct a TransferPair with a desired target vector of the same type. + * + * @param to the target for the transfer + * @return {@link TransferPair} (UnsupportedOperationException) + */ + @Override + public TransferPair makeTransferPair(ValueVector to) { + return new TransferImpl((ViewVarCharVector) to); + } + + private class TransferImpl implements TransferPair { + ViewVarCharVector to; + + public TransferImpl(String ref, BufferAllocator allocator) { + to = new ViewVarCharVector(ref, field.getFieldType(), allocator); + } + + public TransferImpl(Field field, BufferAllocator allocator) { + to = new ViewVarCharVector(field, allocator); + } + + public TransferImpl(ViewVarCharVector to) { + this.to = to; + } + + @Override + public ViewVarCharVector getTo() { + return to; + } + + @Override + public void transfer() { + transferTo(to); + } + + @Override + public void splitAndTransfer(int startIndex, int length) { + splitAndTransferTo(startIndex, length, to); + } + + @Override + public void copyValueSafe(int fromIndex, int toIndex) { + to.copyFromSafe(fromIndex, toIndex, ViewVarCharVector.this); + } + } +} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/compare/RangeEqualsVisitor.java b/java/vector/src/main/java/org/apache/arrow/vector/compare/RangeEqualsVisitor.java index 5323ddda838c8..28da2a86a53c8 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/compare/RangeEqualsVisitor.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/compare/RangeEqualsVisitor.java @@ -22,11 +22,13 @@ import java.util.List; import java.util.function.BiFunction; +import org.apache.arrow.memory.ArrowBuf; import org.apache.arrow.memory.util.ByteFunctionHelpers; import org.apache.arrow.util.Preconditions; import org.apache.arrow.vector.BaseFixedWidthVector; import org.apache.arrow.vector.BaseLargeVariableWidthVector; import org.apache.arrow.vector.BaseVariableWidthVector; +import org.apache.arrow.vector.BaseVariableWidthViewVector; import org.apache.arrow.vector.BitVector; import org.apache.arrow.vector.ExtensionTypeVector; import org.apache.arrow.vector.NullVector; @@ -162,6 +164,14 @@ public Boolean visit(BaseLargeVariableWidthVector left, Range range) { return compareBaseLargeVariableWidthVectors(range); } + @Override + public Boolean visit(BaseVariableWidthViewVector left, Range range) { + if (!validate(left)) { + return false; + } + return compareBaseVariableWidthViewVectors(range); + } + @Override public Boolean visit(ListVector left, Range range) { if (!validate(left)) { @@ -444,6 +454,85 @@ protected boolean compareBaseLargeVariableWidthVectors(Range range) { return true; } + protected boolean compareBaseVariableWidthViewVectors(Range range) { + BaseVariableWidthViewVector leftVector = (BaseVariableWidthViewVector) left; + BaseVariableWidthViewVector rightVector = (BaseVariableWidthViewVector) right; + + final ArrowBuf leftViewBuffer = leftVector.getDataBuffer(); + final ArrowBuf rightViewBuffer = rightVector.getDataBuffer(); + + final int elementSize = BaseVariableWidthViewVector.ELEMENT_SIZE; + final int lengthWidth = BaseVariableWidthViewVector.LENGTH_WIDTH; + final int prefixWidth = BaseVariableWidthViewVector.PREFIX_WIDTH; + final int bufIndexWidth = BaseVariableWidthViewVector.BUF_INDEX_WIDTH; + + List leftDataBuffers = leftVector.getDataBuffers(); + List rightDataBuffers = rightVector.getDataBuffers(); + + for (int i = 0; i < range.getLength(); i++) { + int leftIndex = range.getLeftStart() + i; + int rightIndex = range.getRightStart() + i; + + boolean isNull = leftVector.isNull(leftIndex); + if (isNull != rightVector.isNull(rightIndex)) { + return false; + } + + if (isNull) { + continue; + } + + int startLeftByteOffset = leftIndex * elementSize; + + int startRightByteOffset = rightIndex * elementSize; + + int leftDataBufferValueLength = leftVector.getValueLength(leftIndex); + int rightDataBufferValueLength = rightVector.getValueLength(rightIndex); + + if (leftDataBufferValueLength != rightDataBufferValueLength) { + return false; + } + + if (leftDataBufferValueLength > BaseVariableWidthViewVector.INLINE_SIZE) { + // if the value is stored in the dataBuffers + int leftDataBufferIndex = leftViewBuffer.getInt(startLeftByteOffset + lengthWidth + prefixWidth); + int rightDataBufferIndex = rightViewBuffer.getInt(startRightByteOffset + lengthWidth + prefixWidth); + + final int leftDataOffset = + leftViewBuffer.getInt(startLeftByteOffset + lengthWidth + prefixWidth + bufIndexWidth); + final int rightDataOffset = + rightViewBuffer.getInt(startRightByteOffset + lengthWidth + prefixWidth + bufIndexWidth); + + ArrowBuf leftDataBuffer = leftDataBuffers.get(leftDataBufferIndex); + ArrowBuf rightDataBuffer = rightDataBuffers.get(rightDataBufferIndex); + + // check equality in the considered string stored in the dataBuffers + int retDataBuf = ByteFunctionHelpers.equal( + leftDataBuffer, leftDataOffset, leftDataOffset + leftDataBufferValueLength, + rightDataBuffer, rightDataOffset, rightDataOffset + rightDataBufferValueLength); + + if (retDataBuf == 0) { + return false; + } + } else { + // if the value is stored in the view + final int leftDataOffset = startLeftByteOffset + lengthWidth; + final int rightDataOffset = startRightByteOffset + lengthWidth; + + // check equality in the considered string stored in the view + int retDataBuf = ByteFunctionHelpers.equal( + leftViewBuffer, leftDataOffset, leftDataOffset + leftDataBufferValueLength, + rightViewBuffer, rightDataOffset, rightDataOffset + rightDataBufferValueLength); + + if (retDataBuf == 0) { + return false; + } + } + + } + return true; + } + protected boolean compareListVectors(Range range) { ListVector leftVector = (ListVector) left; ListVector rightVector = (ListVector) right; diff --git a/java/vector/src/main/java/org/apache/arrow/vector/compare/TypeEqualsVisitor.java b/java/vector/src/main/java/org/apache/arrow/vector/compare/TypeEqualsVisitor.java index 443ee1f96e273..aaef161a563be 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/compare/TypeEqualsVisitor.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/compare/TypeEqualsVisitor.java @@ -23,6 +23,7 @@ import org.apache.arrow.vector.BaseFixedWidthVector; import org.apache.arrow.vector.BaseLargeVariableWidthVector; import org.apache.arrow.vector.BaseVariableWidthVector; +import org.apache.arrow.vector.BaseVariableWidthViewVector; import org.apache.arrow.vector.ExtensionTypeVector; import org.apache.arrow.vector.NullVector; import org.apache.arrow.vector.ValueVector; @@ -85,6 +86,11 @@ public Boolean visit(BaseLargeVariableWidthVector left, Void value) { return compareField(left.getField(), right.getField()); } + @Override + public Boolean visit(BaseVariableWidthViewVector left, Void value) { + return compareField(left.getField(), right.getField()); + } + @Override public Boolean visit(ListVector left, Void value) { return compareField(left.getField(), right.getField()); diff --git a/java/vector/src/main/java/org/apache/arrow/vector/compare/VectorVisitor.java b/java/vector/src/main/java/org/apache/arrow/vector/compare/VectorVisitor.java index aee090706b3c8..de88f25e6753d 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/compare/VectorVisitor.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/compare/VectorVisitor.java @@ -20,6 +20,7 @@ import org.apache.arrow.vector.BaseFixedWidthVector; import org.apache.arrow.vector.BaseLargeVariableWidthVector; import org.apache.arrow.vector.BaseVariableWidthVector; +import org.apache.arrow.vector.BaseVariableWidthViewVector; import org.apache.arrow.vector.ExtensionTypeVector; import org.apache.arrow.vector.NullVector; import org.apache.arrow.vector.complex.DenseUnionVector; @@ -42,6 +43,8 @@ public interface VectorVisitor { OUT visit(BaseLargeVariableWidthVector left, IN value); + OUT visit(BaseVariableWidthViewVector left, IN value); + OUT visit(ListVector left, IN value); OUT visit(FixedSizeListVector left, IN value); diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueVector.java index 7906d90c2fff0..7c4015299a6cd 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueVector.java @@ -355,16 +355,8 @@ public int getInnerValueCountAt(int index) { offsetBuffer.getInt(index * OFFSET_WIDTH); } - /** Return if value at index is null (this implementation is always false). */ - @Override - public boolean isNull(int index) { - return false; - } - - /** Return if value at index is empty (this implementation is always false). */ - public boolean isEmpty(int index) { - return false; - } + /** Return if value at index is empty. */ + public abstract boolean isEmpty(int index); /** Starts a new repeated value. */ public int startNewValue(int index) { diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueViewVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueViewVector.java new file mode 100644 index 0000000000000..73a25738854f3 --- /dev/null +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueViewVector.java @@ -0,0 +1,405 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.vector.complex; + +import static org.apache.arrow.memory.util.LargeMemoryUtil.capAtMaxInt; + +import java.util.Collections; +import java.util.Iterator; + +import org.apache.arrow.memory.ArrowBuf; +import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.memory.util.CommonUtil; +import org.apache.arrow.util.Preconditions; +import org.apache.arrow.vector.AddOrGetResult; +import org.apache.arrow.vector.BaseFixedWidthVector; +import org.apache.arrow.vector.BaseValueVector; +import org.apache.arrow.vector.BaseVariableWidthVector; +import org.apache.arrow.vector.DensityAwareVector; +import org.apache.arrow.vector.FieldVector; +import org.apache.arrow.vector.NullVector; +import org.apache.arrow.vector.UInt4Vector; +import org.apache.arrow.vector.ValueVector; +import org.apache.arrow.vector.ZeroVector; +import org.apache.arrow.vector.types.pojo.ArrowType; +import org.apache.arrow.vector.types.pojo.FieldType; +import org.apache.arrow.vector.util.CallBack; +import org.apache.arrow.vector.util.OversizedAllocationException; +import org.apache.arrow.vector.util.SchemaChangeRuntimeException; + +public abstract class BaseRepeatedValueViewVector extends BaseValueVector + implements RepeatedValueVector, BaseListVector { + + public static final FieldVector DEFAULT_DATA_VECTOR = ZeroVector.INSTANCE; + public static final String DATA_VECTOR_NAME = "$data$"; + + public static final byte OFFSET_WIDTH = 4; + public static final byte SIZE_WIDTH = 4; + protected ArrowBuf offsetBuffer; + protected ArrowBuf sizeBuffer; + protected FieldVector vector; + protected final CallBack repeatedCallBack; + protected int valueCount; + protected long offsetAllocationSizeInBytes = INITIAL_VALUE_ALLOCATION * OFFSET_WIDTH; + protected long sizeAllocationSizeInBytes = INITIAL_VALUE_ALLOCATION * SIZE_WIDTH; + private final String name; + + protected String defaultDataVectorName = DATA_VECTOR_NAME; + + protected BaseRepeatedValueViewVector(String name, BufferAllocator allocator, CallBack callBack) { + this(name, allocator, DEFAULT_DATA_VECTOR, callBack); + } + + protected BaseRepeatedValueViewVector( + String name, BufferAllocator allocator, FieldVector vector, CallBack callBack) { + super(allocator); + this.name = name; + this.offsetBuffer = allocator.getEmpty(); + this.sizeBuffer = allocator.getEmpty(); + this.vector = Preconditions.checkNotNull(vector, "data vector cannot be null"); + this.repeatedCallBack = callBack; + this.valueCount = 0; + } + + @Override + public String getName() { + return name; + } + + @Override + public boolean allocateNewSafe() { + boolean dataAlloc = false; + try { + allocateBuffers(); + dataAlloc = vector.allocateNewSafe(); + } catch (Exception e) { + clear(); + return false; + } finally { + if (!dataAlloc) { + clear(); + } + } + return dataAlloc; + } + + private void allocateBuffers() { + offsetBuffer = allocateBuffers(offsetAllocationSizeInBytes); + sizeBuffer = allocateBuffers(sizeAllocationSizeInBytes); + } + + private ArrowBuf allocateBuffers(final long size) { + final int curSize = (int) size; + ArrowBuf buffer = allocator.buffer(curSize); + buffer.readerIndex(0); + buffer.setZero(0, buffer.capacity()); + return buffer; + } + + @Override + public void reAlloc() { + reallocateBuffers(); + vector.reAlloc(); + } + + protected void reallocateBuffers() { + reallocOffsetBuffer(); + reallocSizeBuffer(); + } + + private void reallocOffsetBuffer() { + final long currentBufferCapacity = offsetBuffer.capacity(); + long newAllocationSize = currentBufferCapacity * 2; + if (newAllocationSize == 0) { + if (offsetAllocationSizeInBytes > 0) { + newAllocationSize = offsetAllocationSizeInBytes; + } else { + newAllocationSize = INITIAL_VALUE_ALLOCATION * OFFSET_WIDTH * 2; + } + } + + newAllocationSize = CommonUtil.nextPowerOfTwo(newAllocationSize); + newAllocationSize = Math.min(newAllocationSize, (long) OFFSET_WIDTH * Integer.MAX_VALUE); + assert newAllocationSize >= 1; + + if (newAllocationSize > MAX_ALLOCATION_SIZE || newAllocationSize <= offsetBuffer.capacity()) { + throw new OversizedAllocationException("Unable to expand the buffer"); + } + + final ArrowBuf newBuf = allocator.buffer(newAllocationSize); + newBuf.setBytes(0, offsetBuffer, 0, currentBufferCapacity); + newBuf.setZero(currentBufferCapacity, newBuf.capacity() - currentBufferCapacity); + offsetBuffer.getReferenceManager().release(1); + offsetBuffer = newBuf; + offsetAllocationSizeInBytes = newAllocationSize; + } + + private void reallocSizeBuffer() { + final long currentBufferCapacity = sizeBuffer.capacity(); + long newAllocationSize = currentBufferCapacity * 2; + if (newAllocationSize == 0) { + if (sizeAllocationSizeInBytes > 0) { + newAllocationSize = sizeAllocationSizeInBytes; + } else { + newAllocationSize = INITIAL_VALUE_ALLOCATION * SIZE_WIDTH * 2; + } + } + + newAllocationSize = CommonUtil.nextPowerOfTwo(newAllocationSize); + newAllocationSize = Math.min(newAllocationSize, (long) SIZE_WIDTH * Integer.MAX_VALUE); + assert newAllocationSize >= 1; + + if (newAllocationSize > MAX_ALLOCATION_SIZE || newAllocationSize <= sizeBuffer.capacity()) { + throw new OversizedAllocationException("Unable to expand the buffer"); + } + + final ArrowBuf newBuf = allocator.buffer(newAllocationSize); + newBuf.setBytes(0, sizeBuffer, 0, currentBufferCapacity); + newBuf.setZero(currentBufferCapacity, newBuf.capacity() - currentBufferCapacity); + sizeBuffer.getReferenceManager().release(1); + sizeBuffer = newBuf; + sizeAllocationSizeInBytes = newAllocationSize; + } + + @Override + public FieldVector getDataVector() { + return vector; + } + + @Override + public void setInitialCapacity(int numRecords) { + offsetAllocationSizeInBytes = (numRecords) * OFFSET_WIDTH; + sizeAllocationSizeInBytes = (numRecords) * SIZE_WIDTH; + if (vector instanceof BaseFixedWidthVector || vector instanceof BaseVariableWidthVector) { + vector.setInitialCapacity(numRecords * RepeatedValueVector.DEFAULT_REPEAT_PER_RECORD); + } else { + vector.setInitialCapacity(numRecords); + } + } + + @Override + public void setInitialCapacity(int numRecords, double density) { + if ((numRecords * density) >= Integer.MAX_VALUE) { + throw new OversizedAllocationException("Requested amount of memory is more than max allowed"); + } + + offsetAllocationSizeInBytes = numRecords * OFFSET_WIDTH; + sizeAllocationSizeInBytes = numRecords * SIZE_WIDTH; + + int innerValueCapacity = Math.max((int) (numRecords * density), 1); + + if (vector instanceof DensityAwareVector) { + ((DensityAwareVector) vector).setInitialCapacity(innerValueCapacity, density); + } else { + vector.setInitialCapacity(innerValueCapacity); + } + } + + /** + * Specialized version of setInitialTotalCapacity() for ListViewVector. + * This is used by some callers when they want to explicitly control and be + * conservative about memory allocated for inner data vector. + * This is very useful when we are working with memory constraints for a query + * and have a fixed amount of memory reserved for the record batch. + * In such cases, we are likely to face OOM or related problems when + * we reserve memory for a record batch with value count x and + * do setInitialCapacity(x) such that each vector allocates only + * what is necessary and not the default amount, but the multiplier + * forces the memory requirement to go beyond what was needed. + * + * @param numRecords value count + * @param totalNumberOfElements the total number of elements to allow + * for in this vector across all records. + */ + public void setInitialTotalCapacity(int numRecords, int totalNumberOfElements) { + offsetAllocationSizeInBytes = numRecords * OFFSET_WIDTH; + sizeAllocationSizeInBytes = numRecords * SIZE_WIDTH; + vector.setInitialCapacity(totalNumberOfElements); + } + + @Override + public int getValueCapacity() { + throw new UnsupportedOperationException( + "Get value capacity is not supported in RepeatedValueVector"); + } + + protected int getOffsetBufferValueCapacity() { + return capAtMaxInt(offsetBuffer.capacity() / OFFSET_WIDTH); + } + + protected int getSizeBufferValueCapacity() { + return capAtMaxInt(sizeBuffer.capacity() / SIZE_WIDTH); + } + + @Override + public int getBufferSize() { + if (valueCount == 0) { + return 0; + } + return (valueCount * OFFSET_WIDTH) + (valueCount * SIZE_WIDTH) + vector.getBufferSize(); + } + + @Override + public int getBufferSizeFor(int valueCount) { + if (valueCount == 0) { + return 0; + } + + int innerVectorValueCount = 0; + + for (int i = 0; i < valueCount; i++) { + innerVectorValueCount += sizeBuffer.getInt(i * SIZE_WIDTH); + } + + return (valueCount * OFFSET_WIDTH) + (valueCount * SIZE_WIDTH) + + vector.getBufferSizeFor(innerVectorValueCount); + } + + @Override + public Iterator iterator() { + return Collections.singleton(getDataVector()).iterator(); + } + + @Override + public void clear() { + offsetBuffer = releaseBuffer(offsetBuffer); + sizeBuffer = releaseBuffer(sizeBuffer); + vector.clear(); + valueCount = 0; + super.clear(); + } + + @Override + public void reset() { + offsetBuffer.setZero(0, offsetBuffer.capacity()); + sizeBuffer.setZero(0, sizeBuffer.capacity()); + vector.reset(); + valueCount = 0; + } + + @Override + public ArrowBuf[] getBuffers(boolean clear) { + return new ArrowBuf[0]; + } + + @Override + public int getValueCount() { + return valueCount; + } + + @Override + public void setValueCount(int valueCount) { + this.valueCount = valueCount; + while (valueCount > getOffsetBufferValueCapacity()) { + reallocateBuffers(); + } + final int childValueCount = valueCount == 0 ? 0 : getLengthOfChildVector(); + vector.setValueCount(childValueCount); + } + + protected int getLengthOfChildVector() { + int maxOffsetSizeSum = offsetBuffer.getInt(0) + sizeBuffer.getInt(0); + int minOffset = offsetBuffer.getInt(0); + for (int i = 0; i < valueCount; i++) { + int currentOffset = offsetBuffer.getInt(i * OFFSET_WIDTH); + int currentSize = sizeBuffer.getInt(i * SIZE_WIDTH); + int currentSum = currentOffset + currentSize; + + maxOffsetSizeSum = Math.max(maxOffsetSizeSum, currentSum); + minOffset = Math.min(minOffset, currentOffset); + } + + return maxOffsetSizeSum - minOffset; + } + + protected int getLengthOfChildVectorByIndex(int index) { + int maxOffsetSizeSum = offsetBuffer.getInt(0) + sizeBuffer.getInt(0); + int minOffset = offsetBuffer.getInt(0); + for (int i = 0; i < index; i++) { + int currentOffset = offsetBuffer.getInt(i * OFFSET_WIDTH); + int currentSize = sizeBuffer.getInt(i * SIZE_WIDTH); + int currentSum = currentOffset + currentSize; + + maxOffsetSizeSum = Math.max(maxOffsetSizeSum, currentSum); + minOffset = Math.min(minOffset, currentOffset); + } + + return maxOffsetSizeSum - minOffset; + } + + /** + * Initialize the data vector (and execute callback) if it hasn't already been done, + * returns the data vector. + */ + public AddOrGetResult addOrGetVector(FieldType fieldType) { + boolean created = false; + if (vector instanceof NullVector) { + vector = fieldType.createNewSingleVector(defaultDataVectorName, allocator, repeatedCallBack); + // returned vector must have the same field + created = true; + if (repeatedCallBack != null && + // not a schema change if changing from ZeroVector to ZeroVector + (fieldType.getType().getTypeID() != ArrowType.ArrowTypeID.Null)) { + repeatedCallBack.doWork(); + } + } + + if (vector.getField().getType().getTypeID() != fieldType.getType().getTypeID()) { + final String msg = String.format("Inner vector type mismatch. Requested type: [%s], actual type: [%s]", + fieldType.getType().getTypeID(), vector.getField().getType().getTypeID()); + throw new SchemaChangeRuntimeException(msg); + } + + return new AddOrGetResult<>((T) vector, created); + } + + protected void replaceDataVector(FieldVector v) { + vector.clear(); + vector = v; + } + + public abstract boolean isEmpty(int index); + + /** + * Start a new value at the given index. + * @param index the index to start the new value at + * @return the offset in the data vector where the new value starts + */ + public int startNewValue(int index) { + while (index >= getOffsetBufferValueCapacity()) { + reallocOffsetBuffer(); + } + while (index >= getSizeBufferValueCapacity()) { + reallocSizeBuffer(); + } + + if (index > 0) { + final int prevOffset = getLengthOfChildVectorByIndex(index); + offsetBuffer.setInt(index * OFFSET_WIDTH, prevOffset); + } + + setValueCount(index + 1); + return offsetBuffer.getInt(index * OFFSET_WIDTH); + } + + @Override + @Deprecated + public UInt4Vector getOffsetVector() { + throw new UnsupportedOperationException("There is no inner offset vector"); + } +} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java new file mode 100644 index 0000000000000..b19691e7aaab7 --- /dev/null +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java @@ -0,0 +1,872 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.vector.complex; + +import static java.util.Collections.singletonList; +import static org.apache.arrow.memory.util.LargeMemoryUtil.capAtMaxInt; +import static org.apache.arrow.memory.util.LargeMemoryUtil.checkedCastToInt; +import static org.apache.arrow.util.Preconditions.checkArgument; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; + +import org.apache.arrow.memory.ArrowBuf; +import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.memory.OutOfMemoryException; +import org.apache.arrow.memory.util.ArrowBufPointer; +import org.apache.arrow.memory.util.ByteFunctionHelpers; +import org.apache.arrow.memory.util.CommonUtil; +import org.apache.arrow.memory.util.hash.ArrowBufHasher; +import org.apache.arrow.vector.AddOrGetResult; +import org.apache.arrow.vector.BitVectorHelper; +import org.apache.arrow.vector.BufferBacked; +import org.apache.arrow.vector.FieldVector; +import org.apache.arrow.vector.ValueVector; +import org.apache.arrow.vector.compare.VectorVisitor; +import org.apache.arrow.vector.complex.impl.UnionListReader; +import org.apache.arrow.vector.complex.impl.UnionListViewWriter; +import org.apache.arrow.vector.complex.reader.FieldReader; +import org.apache.arrow.vector.ipc.message.ArrowFieldNode; +import org.apache.arrow.vector.types.Types.MinorType; +import org.apache.arrow.vector.types.pojo.ArrowType; +import org.apache.arrow.vector.types.pojo.Field; +import org.apache.arrow.vector.types.pojo.FieldType; +import org.apache.arrow.vector.util.CallBack; +import org.apache.arrow.vector.util.JsonStringArrayList; +import org.apache.arrow.vector.util.OversizedAllocationException; +import org.apache.arrow.vector.util.TransferPair; + +/** + * A list view vector contains lists of a specific type of elements. + * Its structure contains four elements. + *

      + *
    1. A validity buffer.
    2. + *
    3. An offset buffer, that denotes lists starts.
    4. + *
    5. A size buffer, that denotes lists ends.
    6. + *
    7. A child data vector that contains the elements of lists.
    8. + *
    + * The latter three are managed by its superclass. + */ + +/* +* TODO: consider merging the functionality in `BaseRepeatedValueVector` into this class. +*/ +public class ListViewVector extends BaseRepeatedValueViewVector implements PromotableVector { + + protected ArrowBuf validityBuffer; + protected UnionListReader reader; + private CallBack callBack; + protected Field field; + protected int validityAllocationSizeInBytes; + + public static ListViewVector empty(String name, BufferAllocator allocator) { + return new ListViewVector(name, allocator, FieldType.nullable(ArrowType.ListView.INSTANCE), null); + } + + /** + * Constructs a new instance. + * + * @param name The name of the instance. + * @param allocator The allocator to use for allocating/reallocating buffers. + * @param fieldType The type of this list. + * @param callBack A schema change callback. + */ + public ListViewVector(String name, BufferAllocator allocator, FieldType fieldType, CallBack callBack) { + this(new Field(name, fieldType, null), allocator, callBack); + } + + /** + * Constructs a new instance. + * + * @param field The field materialized by this vector. + * @param allocator The allocator to use for allocating/reallocating buffers. + * @param callBack A schema change callback. + */ + public ListViewVector(Field field, BufferAllocator allocator, CallBack callBack) { + super(field.getName(), allocator, callBack); + this.validityBuffer = allocator.getEmpty(); + this.field = field; + this.callBack = callBack; + this.validityAllocationSizeInBytes = getValidityBufferSizeFromCount(INITIAL_VALUE_ALLOCATION); + } + + @Override + public void initializeChildrenFromFields(List children) { + checkArgument(children.size() == 1, + "ListViews have one child Field. Found: %s", children.isEmpty() ? "none" : children); + + Field field = children.get(0); + AddOrGetResult addOrGetVector = addOrGetVector(field.getFieldType()); + checkArgument(addOrGetVector.isCreated(), "Child vector already existed: %s", addOrGetVector.getVector()); + + addOrGetVector.getVector().initializeChildrenFromFields(field.getChildren()); + this.field = new Field(this.field.getName(), this.field.getFieldType(), children); + } + + @Override + public void setInitialCapacity(int numRecords) { + validityAllocationSizeInBytes = getValidityBufferSizeFromCount(numRecords); + super.setInitialCapacity(numRecords); + } + + /** + * Specialized version of setInitialCapacity() for ListViewVector. + * This is used by some callers when they want to explicitly control and be + * conservative about memory allocated for inner data vector. + * This is very useful when we are working with memory constraints for a query + * and have a fixed amount of memory reserved for the record batch. + * In such cases, we are likely to face OOM or related problems when + * we reserve memory for a record batch with value count x and + * do setInitialCapacity(x) such that each vector allocates only + * what is necessary and not the default amount, but the multiplier + * forces the memory requirement to go beyond what was needed. + * + * @param numRecords value count + * @param density density of ListViewVector. + * Density is the average size of a list per position in the ListViewVector. + * For example, a + * density value of 10 implies each position in the list + * vector has a list of 10 values. + * A density value of 0.1 implies out of 10 positions in + * the list vector, 1 position has a list of size 1, and + * the remaining positions are null (no lists) or empty lists. + * This helps in tightly controlling the memory we provision + * for inner data vector. + */ + @Override + public void setInitialCapacity(int numRecords, double density) { + validityAllocationSizeInBytes = getValidityBufferSizeFromCount(numRecords); + super.setInitialCapacity(numRecords, density); + } + + /** + * Specialized version of setInitialTotalCapacity() for ListViewVector. + * This is used by some callers when they want to explicitly control and be + * conservative about memory allocated for inner data vector. + * This is very useful when we are working with memory constraints for a query + * and have a fixed amount of memory reserved for the record batch. + * In such cases, we are likely to face OOM or related problems when + * we reserve memory for a record batch with value count x and + * do setInitialCapacity(x) such that each vector allocates only + * what is necessary and not the default amount, but the multiplier + * forces the memory requirement to go beyond what was needed. + * + * @param numRecords value count + * @param totalNumberOfElements the total number of elements to allow + * for in this vector across all records. + */ + @Override + public void setInitialTotalCapacity(int numRecords, int totalNumberOfElements) { + validityAllocationSizeInBytes = getValidityBufferSizeFromCount(numRecords); + super.setInitialTotalCapacity(numRecords, totalNumberOfElements); + } + + @Override + public List getChildrenFromFields() { + return singletonList(getDataVector()); + } + + /** + * Load the buffers associated with this Field. + * @param fieldNode the fieldNode + * @param ownBuffers the buffers for this Field (own buffers only, children not included) + */ + @Override + public void loadFieldBuffers(ArrowFieldNode fieldNode, List ownBuffers) { + if (ownBuffers.size() != 3) { + throw new IllegalArgumentException("Illegal buffer count, expected " + + 3 + ", got: " + ownBuffers.size()); + } + + ArrowBuf bitBuffer = ownBuffers.get(0); + ArrowBuf offBuffer = ownBuffers.get(1); + ArrowBuf szBuffer = ownBuffers.get(2); + + validityBuffer.getReferenceManager().release(); + validityBuffer = BitVectorHelper.loadValidityBuffer(fieldNode, bitBuffer, allocator); + offsetBuffer.getReferenceManager().release(); + offsetBuffer = offBuffer.getReferenceManager().retain(offBuffer, allocator); + sizeBuffer.getReferenceManager().release(); + sizeBuffer = szBuffer.getReferenceManager().retain(szBuffer, allocator); + + validityAllocationSizeInBytes = checkedCastToInt(validityBuffer.capacity()); + offsetAllocationSizeInBytes = offsetBuffer.capacity(); + sizeAllocationSizeInBytes = sizeBuffer.capacity(); + + valueCount = fieldNode.getLength(); + } + + /** + * Set the reader and writer indexes for the inner buffers. + */ + private void setReaderAndWriterIndex() { + validityBuffer.readerIndex(0); + offsetBuffer.readerIndex(0); + sizeBuffer.readerIndex(0); + if (valueCount == 0) { + validityBuffer.writerIndex(0); + offsetBuffer.writerIndex(0); + sizeBuffer.writerIndex(0); + } else { + validityBuffer.writerIndex(getValidityBufferSizeFromCount(valueCount)); + offsetBuffer.writerIndex(valueCount * OFFSET_WIDTH); + sizeBuffer.writerIndex(valueCount * SIZE_WIDTH); + } + } + + @Override + public List getFieldBuffers() { + List result = new ArrayList<>(2); + setReaderAndWriterIndex(); + result.add(validityBuffer); + result.add(offsetBuffer); + result.add(sizeBuffer); + + return result; + } + + /** + * Export the buffers of the fields for C Data Interface. + * This method traverses the buffers and export buffer and buffer's memory address into a list of + * buffers and a pointer to the list of buffers. + */ + @Override + public void exportCDataBuffers(List buffers, ArrowBuf buffersPtr, long nullValue) { + throw new UnsupportedOperationException("exportCDataBuffers Not implemented yet"); + } + + @Override + public void allocateNew() throws OutOfMemoryException { + if (!allocateNewSafe()) { + throw new OutOfMemoryException("Failure while allocating memory"); + } + } + + @Override + public boolean allocateNewSafe() { + boolean success = false; + try { + /* release the current buffers, hence this is a new allocation + * Note that, the `clear` method call below is releasing validityBuffer + * calling the superclass clear method which is releasing the associated buffers + * (sizeBuffer and offsetBuffer). + */ + clear(); + /* allocate validity buffer */ + allocateValidityBuffer(validityAllocationSizeInBytes); + /* allocate offset, data and sizes buffer */ + success = super.allocateNewSafe(); + } finally { + if (!success) { + clear(); + } + } + return success; + } + + protected void allocateValidityBuffer(final long size) { + final int curSize = (int) size; + validityBuffer = allocator.buffer(curSize); + validityBuffer.readerIndex(0); + validityAllocationSizeInBytes = curSize; + validityBuffer.setZero(0, validityBuffer.capacity()); + } + + @Override + public void reAlloc() { + /* reallocate the validity buffer */ + reallocValidityBuffer(); + /* reallocate the offset, size, and data */ + super.reAlloc(); + } + + protected void reallocValidityAndSizeAndOffsetBuffers() { + reallocateBuffers(); + reallocValidityBuffer(); + } + + private void reallocValidityBuffer() { + final int currentBufferCapacity = checkedCastToInt(validityBuffer.capacity()); + long newAllocationSize = getNewAllocationSize(currentBufferCapacity); + + final ArrowBuf newBuf = allocator.buffer(newAllocationSize); + newBuf.setBytes(0, validityBuffer, 0, currentBufferCapacity); + newBuf.setZero(currentBufferCapacity, newBuf.capacity() - currentBufferCapacity); + validityBuffer.getReferenceManager().release(1); + validityBuffer = newBuf; + validityAllocationSizeInBytes = (int) newAllocationSize; + } + + private long getNewAllocationSize(int currentBufferCapacity) { + long newAllocationSize = currentBufferCapacity * 2L; + if (newAllocationSize == 0) { + if (validityAllocationSizeInBytes > 0) { + newAllocationSize = validityAllocationSizeInBytes; + } else { + newAllocationSize = getValidityBufferSizeFromCount(INITIAL_VALUE_ALLOCATION) * 2L; + } + } + newAllocationSize = CommonUtil.nextPowerOfTwo(newAllocationSize); + assert newAllocationSize >= 1; + + if (newAllocationSize > MAX_ALLOCATION_SIZE) { + throw new OversizedAllocationException("Unable to expand the buffer"); + } + return newAllocationSize; + } + + @Override + public void copyFromSafe(int inIndex, int outIndex, ValueVector from) { + // TODO: https://github.com/apache/arrow/issues/41270 + throw new UnsupportedOperationException( + "ListViewVector does not support copyFromSafe operation yet."); + } + + @Override + public void copyFrom(int inIndex, int outIndex, ValueVector from) { + // TODO: https://github.com/apache/arrow/issues/41270 + throw new UnsupportedOperationException( + "ListViewVector does not support copyFrom operation yet."); + } + + @Override + public FieldVector getDataVector() { + return vector; + } + + @Override + public TransferPair getTransferPair(String ref, BufferAllocator allocator) { + return getTransferPair(ref, allocator, null); + } + + @Override + public TransferPair getTransferPair(Field field, BufferAllocator allocator) { + return getTransferPair(field, allocator, null); + } + + @Override + public TransferPair getTransferPair(String ref, BufferAllocator allocator, CallBack callBack) { + // TODO: https://github.com/apache/arrow/issues/41269 + throw new UnsupportedOperationException( + "ListVector does not support getTransferPair(String, BufferAllocator, CallBack) yet"); + } + + @Override + public TransferPair getTransferPair(Field field, BufferAllocator allocator, CallBack callBack) { + // TODO: https://github.com/apache/arrow/issues/41269 + throw new UnsupportedOperationException( + "ListVector does not support getTransferPair(Field, BufferAllocator, CallBack) yet"); + } + + @Override + public TransferPair makeTransferPair(ValueVector target) { + // TODO: https://github.com/apache/arrow/issues/41269 + throw new UnsupportedOperationException( + "ListVector does not support makeTransferPair(ValueVector) yet"); + } + + @Override + public long getValidityBufferAddress() { + return validityBuffer.memoryAddress(); + } + + @Override + public long getDataBufferAddress() { + throw new UnsupportedOperationException(); + } + + @Override + public long getOffsetBufferAddress() { + return offsetBuffer.memoryAddress(); + } + + @Override + public ArrowBuf getValidityBuffer() { + return validityBuffer; + } + + @Override + public ArrowBuf getDataBuffer() { + throw new UnsupportedOperationException(); + } + + @Override + public ArrowBuf getOffsetBuffer() { + return offsetBuffer; + } + + public ArrowBuf getSizeBuffer() { + return sizeBuffer; + } + + public long getSizeBufferAddress() { + return sizeBuffer.memoryAddress(); + } + + /** + * Get the hash code for the element at the given index. + * @param index position of the element + * @return hash code for the element at the given index + */ + @Override + public int hashCode(int index) { + return hashCode(index, null); + } + + /** + * Get the hash code for the element at the given index. + * @param index position of the element + * @param hasher hasher to use + * @return hash code for the element at the given index + */ + @Override + public int hashCode(int index, ArrowBufHasher hasher) { + if (isSet(index) == 0) { + return ArrowBufPointer.NULL_HASH_CODE; + } + int hash = 0; + final int start = offsetBuffer.getInt(index * OFFSET_WIDTH); + final int end = sizeBuffer.getInt(index * OFFSET_WIDTH); + for (int i = start; i < end; i++) { + hash = ByteFunctionHelpers.combineHash(hash, vector.hashCode(i, hasher)); + } + return hash; + } + + @Override + public OUT accept(VectorVisitor visitor, IN value) { + throw new UnsupportedOperationException(); + } + + @Override + protected FieldReader getReaderImpl() { + // TODO: https://github.com/apache/arrow/issues/41569 + throw new UnsupportedOperationException( + "ListViewVector does not support getReaderImpl operation yet."); + } + + @Override + public UnionListReader getReader() { + // TODO: https://github.com/apache/arrow/issues/41569 + throw new UnsupportedOperationException( + "ListViewVector does not support getReader operation yet."); + } + + /** + * Get the size (number of bytes) of underlying buffers used by this + * vector. + * @return size of underlying buffers. + */ + @Override + public int getBufferSize() { + if (valueCount == 0) { + return 0; + } + final int offsetBufferSize = valueCount * OFFSET_WIDTH; + final int sizeBufferSize = valueCount * SIZE_WIDTH; + final int validityBufferSize = getValidityBufferSizeFromCount(valueCount); + return offsetBufferSize + sizeBufferSize + validityBufferSize + vector.getBufferSize(); + } + + /** + * Get the size (number of bytes) of underlying buffers used by this. + * @param valueCount the number of values to assume this vector contains + * @return size of underlying buffers. + */ + @Override + public int getBufferSizeFor(int valueCount) { + if (valueCount == 0) { + return 0; + } + final int validityBufferSize = getValidityBufferSizeFromCount(valueCount); + + return super.getBufferSizeFor(valueCount) + validityBufferSize; + } + + /** + * Get the field associated with the list view vector. + * @return the field + */ + @Override + public Field getField() { + if (field.getChildren().contains(getDataVector().getField())) { + return field; + } + field = new Field(field.getName(), field.getFieldType(), Collections.singletonList(getDataVector().getField())); + return field; + } + + /** + * Get the minor type for the vector. + * @return the minor type + */ + @Override + public MinorType getMinorType() { + return MinorType.LISTVIEW; + } + + /** + * Clear the vector data. + */ + @Override + public void clear() { + // calling superclass clear method which is releasing the sizeBufer and offsetBuffer + super.clear(); + validityBuffer = releaseBuffer(validityBuffer); + } + + /** + * Release the buffers associated with this vector. + */ + @Override + public void reset() { + super.reset(); + validityBuffer.setZero(0, validityBuffer.capacity()); + } + + /** + * Return the underlying buffers associated with this vector. Note that this doesn't + * impact the reference counts for this buffer, so it only should be used for in-context + * access. Also note that this buffer changes regularly, thus + * external classes shouldn't hold a reference to it (unless they change it). + * + * @param clear Whether to clear vector before returning, the buffers will still be refcounted + * but the returned array will be the only reference to them + * @return The underlying {@link ArrowBuf buffers} that is used by this + * vector instance. + */ + @Override + public ArrowBuf[] getBuffers(boolean clear) { + setReaderAndWriterIndex(); + final ArrowBuf[] buffers; + if (getBufferSize() == 0) { + buffers = new ArrowBuf[0]; + } else { + List list = new ArrayList<>(); + // the order must be validity, offset and size buffers + list.add(validityBuffer); + list.add(offsetBuffer); + list.add(sizeBuffer); + list.addAll(Arrays.asList(vector.getBuffers(false))); + buffers = list.toArray(new ArrowBuf[list.size()]); + } + if (clear) { + for (ArrowBuf buffer : buffers) { + buffer.getReferenceManager().retain(); + } + clear(); + } + return buffers; + } + + /** + * Get the element in the list view vector at a particular index. + * @param index position of the element + * @return Object at given position + */ + @Override + public List getObject(int index) { + if (isSet(index) == 0) { + return null; + } + final List vals = new JsonStringArrayList<>(); + final int start = offsetBuffer.getInt(index * OFFSET_WIDTH); + final int end = start + sizeBuffer.getInt((index) * SIZE_WIDTH); + final ValueVector vv = getDataVector(); + for (int i = start; i < end; i++) { + vals.add(vv.getObject(i)); + } + + return vals; + } + + /** + * Check if an element at given index is null. + * + * @param index position of an element + * @return true if an element at given index is null, false otherwise + */ + @Override + public boolean isNull(int index) { + return (isSet(index) == 0); + } + + /** + * Check if an element at given index is an empty list. + * @param index position of an element + * @return true if an element at given index is an empty list or NULL, false otherwise + */ + @Override + public boolean isEmpty(int index) { + if (isNull(index)) { + return true; + } else { + return sizeBuffer.getInt(index * SIZE_WIDTH) == 0; + } + } + + /** + * Same as {@link #isNull(int)}. + * + * @param index position of the element + * @return 1 if element at given index is not null, 0 otherwise + */ + public int isSet(int index) { + final int byteIndex = index >> 3; + final byte b = validityBuffer.getByte(byteIndex); + final int bitIndex = index & 7; + return (b >> bitIndex) & 0x01; + } + + /** + * Get the number of elements that are null in the vector. + * + * @return the number of null elements. + */ + @Override + public int getNullCount() { + return BitVectorHelper.getNullCount(validityBuffer, valueCount); + } + + /** + * Get the value capacity by considering validity and offset capacity. + * Note that the size buffer capacity is not considered here since it has + * the same capacity as the offset buffer. + * + * @return the value capacity + */ + @Override + public int getValueCapacity() { + return getValidityAndOffsetValueCapacity(); + } + + private int getValidityAndSizeValueCapacity() { + final int offsetValueCapacity = Math.max(getOffsetBufferValueCapacity(), 0); + final int sizeValueCapacity = Math.max(getSizeBufferValueCapacity(), 0); + return Math.min(offsetValueCapacity, sizeValueCapacity); + } + + private int getValidityAndOffsetValueCapacity() { + final int offsetValueCapacity = Math.max(getOffsetBufferValueCapacity(), 0); + return Math.min(offsetValueCapacity, getValidityBufferValueCapacity()); + } + + private int getValidityBufferValueCapacity() { + return capAtMaxInt(validityBuffer.capacity() * 8); + } + + /** + * Set the element at the given index to null. + * @param index the value to change + */ + @Override + public void setNull(int index) { + while (index >= getValidityAndSizeValueCapacity()) { + reallocValidityAndSizeAndOffsetBuffers(); + } + + offsetBuffer.setInt(index * OFFSET_WIDTH, 0); + sizeBuffer.setInt(index * SIZE_WIDTH, 0); + BitVectorHelper.unsetBit(validityBuffer, index); + } + + /** + * Start new value in the ListView vector. + * + * @param index index of the value to start + * @return offset of the new value + */ + @Override + public int startNewValue(int index) { + while (index >= getValidityAndSizeValueCapacity()) { + reallocValidityAndSizeAndOffsetBuffers(); + } + + if (index > 0) { + final int prevOffset = getLengthOfChildVectorByIndex(index); + offsetBuffer.setInt(index * OFFSET_WIDTH, prevOffset); + } + + BitVectorHelper.setBit(validityBuffer, index); + return offsetBuffer.getInt(index * OFFSET_WIDTH); + } + + /** + * Validate the invariants of the offset and size buffers. + * 0 <= offsets[i] <= length of the child array + * 0 <= offsets[i] + size[i] <= length of the child array + * @param offset the offset at a given index + * @param size the size at a given index + */ + private void validateInvariants(int offset, int size) { + if (offset < 0) { + throw new IllegalArgumentException("Offset cannot be negative"); + } + + if (size < 0) { + throw new IllegalArgumentException("Size cannot be negative"); + } + + // 0 <= offsets[i] <= length of the child array + if (offset > this.vector.getValueCount()) { + throw new IllegalArgumentException("Offset is out of bounds."); + } + + // 0 <= offsets[i] + size[i] <= length of the child array + if (offset + size > this.vector.getValueCount()) { + throw new IllegalArgumentException("Offset + size <= length of the child array."); + } + } + + /** + * Set the offset at the given index. + * Make sure to use this function after updating `field` vector and using `setValidity` + * @param index index of the value to set + * @param value value to set + */ + public void setOffset(int index, int value) { + validateInvariants(value, sizeBuffer.getInt(index * SIZE_WIDTH)); + + offsetBuffer.setInt(index * OFFSET_WIDTH, value); + } + + /** + * Set the size at the given index. + * Make sure to use this function after using `setOffset`. + * @param index index of the value to set + * @param value value to set + */ + public void setSize(int index, int value) { + validateInvariants(offsetBuffer.getInt(index * SIZE_WIDTH), value); + + sizeBuffer.setInt(index * SIZE_WIDTH, value); + } + + /** + * Set the validity at the given index. + * @param index index of the value to set + * @param value value to set (0 for unset and 1 for a set) + */ + public void setValidity(int index, int value) { + if (value == 0) { + BitVectorHelper.unsetBit(validityBuffer, index); + } else { + BitVectorHelper.setBit(validityBuffer, index); + } + } + + @Override + public void setValueCount(int valueCount) { + this.valueCount = valueCount; + if (valueCount > 0) { + while (valueCount > getValidityAndSizeValueCapacity()) { + /* check if validity and offset buffers need to be re-allocated */ + reallocValidityAndSizeAndOffsetBuffers(); + } + } + /* valueCount for the data vector is the current end offset */ + final int childValueCount = (valueCount == 0) ? 0 : getLengthOfChildVector(); + /* set the value count of data vector and this will take care of + * checking whether data buffer needs to be reallocated. + */ + vector.setValueCount(childValueCount); + } + + @Override + public int getElementStartIndex(int index) { + return offsetBuffer.getInt(index * OFFSET_WIDTH); + } + + @Override + public int getElementEndIndex(int index) { + return sizeBuffer.getInt(index * OFFSET_WIDTH); + } + + @Override + public AddOrGetResult addOrGetVector(FieldType fieldType) { + AddOrGetResult result = super.addOrGetVector(fieldType); + invalidateReader(); + return result; + } + + @Override + public UnionVector promoteToUnion() { + UnionVector vector = new UnionVector("$data$", allocator, /* field type*/ null, callBack); + replaceDataVector(vector); + invalidateReader(); + if (callBack != null) { + callBack.doWork(); + } + return vector; + } + + private void invalidateReader() { + reader = null; + } + + @Deprecated + @Override + public List getFieldInnerVectors() { + throw new UnsupportedOperationException("There are no inner vectors. Use getFieldBuffers"); + } + + public UnionListViewWriter getWriter() { + return new UnionListViewWriter(this); + } + + @Override + public int getValueCount() { + return valueCount; + } + + /** + * Get the density of this ListVector. + * @return density + */ + public double getDensity() { + if (valueCount == 0) { + return 0.0D; + } + final double totalListSize = getLengthOfChildVector(); + return totalListSize / valueCount; + } + + /** + * Validating ListViewVector creation based on the specification guideline. + */ + @Override + public void validate() { + for (int i = 0; i < valueCount; i++) { + final int offset = offsetBuffer.getInt(i * OFFSET_WIDTH); + final int size = sizeBuffer.getInt(i * SIZE_WIDTH); + validateInvariants(offset, size); + } + } + + /** + * End the current value. + * + * @param index index of the value to end + * @param size number of elements in the list that was written + */ + public void endValue(int index, int size) { + sizeBuffer.setInt(index * SIZE_WIDTH, size); + } +} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/PromotableWriter.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/PromotableWriter.java index 7f724829ef1eb..c59b997286d2d 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/PromotableWriter.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/PromotableWriter.java @@ -29,6 +29,7 @@ import org.apache.arrow.vector.complex.FixedSizeListVector; import org.apache.arrow.vector.complex.LargeListVector; import org.apache.arrow.vector.complex.ListVector; +import org.apache.arrow.vector.complex.ListViewVector; import org.apache.arrow.vector.complex.MapVector; import org.apache.arrow.vector.complex.StructVector; import org.apache.arrow.vector.complex.UnionVector; @@ -54,6 +55,7 @@ public class PromotableWriter extends AbstractPromotableFieldWriter { private final AbstractStructVector parentContainer; private final ListVector listVector; + private final ListViewVector listViewVector; private final FixedSizeListVector fixedListVector; private final LargeListVector largeListVector; private final NullableStructWriterFactory nullableStructWriterFactory; @@ -94,6 +96,7 @@ public PromotableWriter( NullableStructWriterFactory nullableStructWriterFactory) { this.parentContainer = parentContainer; this.listVector = null; + this.listViewVector = null; this.fixedListVector = null; this.largeListVector = null; this.nullableStructWriterFactory = nullableStructWriterFactory; @@ -142,6 +145,27 @@ public PromotableWriter( ListVector listVector, NullableStructWriterFactory nullableStructWriterFactory) { this.listVector = listVector; + this.listViewVector = null; + this.parentContainer = null; + this.fixedListVector = null; + this.largeListVector = null; + this.nullableStructWriterFactory = nullableStructWriterFactory; + init(v); + } + + /** + * Constructs a new instance. + * + * @param v The vector to initialize the writer with. + * @param listViewVector The vector that serves as a parent of v. + * @param nullableStructWriterFactory The factory to create the delegate writer. + */ + public PromotableWriter( + ValueVector v, + ListViewVector listViewVector, + NullableStructWriterFactory nullableStructWriterFactory) { + this.listViewVector = listViewVector; + this.listVector = null; this.parentContainer = null; this.fixedListVector = null; this.largeListVector = null; @@ -163,6 +187,7 @@ public PromotableWriter( this.fixedListVector = fixedListVector; this.parentContainer = null; this.listVector = null; + this.listViewVector = null; this.largeListVector = null; this.nullableStructWriterFactory = nullableStructWriterFactory; init(v); @@ -183,6 +208,7 @@ public PromotableWriter( this.fixedListVector = null; this.parentContainer = null; this.listVector = null; + this.listViewVector = null; this.nullableStructWriterFactory = nullableStructWriterFactory; init(v); } @@ -280,6 +306,8 @@ protected FieldWriter getWriter(MinorType type, ArrowType arrowType) { v = listVector.addOrGetVector(fieldType).getVector(); } else if (fixedListVector != null) { v = fixedListVector.addOrGetVector(fieldType).getVector(); + } else if (listViewVector != null) { + v = listViewVector.addOrGetVector(fieldType).getVector(); } else { v = largeListVector.addOrGetVector(fieldType).getVector(); } @@ -322,6 +350,8 @@ private FieldWriter promoteToUnion() { unionVector = fixedListVector.promoteToUnion(); } else if (largeListVector != null) { unionVector = largeListVector.promoteToUnion(); + } else if (listViewVector != null) { + unionVector = listViewVector.promoteToUnion(); } unionVector.addVector((FieldVector) tp.getTo()); writer = new UnionWriter(unionVector, nullableStructWriterFactory); diff --git a/java/vector/src/main/java/org/apache/arrow/vector/ipc/JsonFileReader.java b/java/vector/src/main/java/org/apache/arrow/vector/ipc/JsonFileReader.java index e927acd4816ad..5c9efc445e0c4 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/ipc/JsonFileReader.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/ipc/JsonFileReader.java @@ -715,6 +715,7 @@ private ArrowBuf readIntoBuffer(BufferAllocator allocator, BufferType bufferType private void readFromJsonIntoVector(Field field, FieldVector vector) throws JsonParseException, IOException { ArrowType type = field.getType(); + // TODO: https://github.com/apache/arrow/issues/41733 TypeLayout typeLayout = TypeLayout.getTypeLayout(type); List vectorTypes = typeLayout.getBufferTypes(); ArrowBuf[] vectorBuffers = new ArrowBuf[vectorTypes.size()]; diff --git a/java/vector/src/main/java/org/apache/arrow/vector/ipc/JsonFileWriter.java b/java/vector/src/main/java/org/apache/arrow/vector/ipc/JsonFileWriter.java index f5e267e81256c..670881b238ecb 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/ipc/JsonFileWriter.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/ipc/JsonFileWriter.java @@ -208,6 +208,7 @@ private void writeBatch(VectorSchemaRoot recordBatch) throws IOException { } private void writeFromVectorIntoJson(Field field, FieldVector vector) throws IOException { + // TODO: https://github.com/apache/arrow/issues/41733 List vectorTypes = TypeLayout.getTypeLayout(field.getType()).getBufferTypes(); List vectorBuffers = vector.getFieldBuffers(); if (vectorTypes.size() != vectorBuffers.size()) { diff --git a/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/ArrowRecordBatch.java b/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/ArrowRecordBatch.java index f81d049a9257f..b910cfc6ecc25 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/ArrowRecordBatch.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/ArrowRecordBatch.java @@ -56,17 +56,19 @@ public class ArrowRecordBatch implements ArrowMessage { private final List buffersLayout; + private final List variadicBufferCounts; + private boolean closed = false; public ArrowRecordBatch( int length, List nodes, List buffers) { - this(length, nodes, buffers, NoCompressionCodec.DEFAULT_BODY_COMPRESSION, true); + this(length, nodes, buffers, NoCompressionCodec.DEFAULT_BODY_COMPRESSION, null, true); } public ArrowRecordBatch( int length, List nodes, List buffers, ArrowBodyCompression bodyCompression) { - this(length, nodes, buffers, bodyCompression, true); + this(length, nodes, buffers, bodyCompression, null, true); } /** @@ -81,7 +83,7 @@ public ArrowRecordBatch( public ArrowRecordBatch( int length, List nodes, List buffers, ArrowBodyCompression bodyCompression, boolean alignBuffers) { - this(length, nodes, buffers, bodyCompression, alignBuffers, /*retainBuffers*/ true); + this(length, nodes, buffers, bodyCompression, null, alignBuffers, /*retainBuffers*/ true); } /** @@ -98,12 +100,48 @@ public ArrowRecordBatch( public ArrowRecordBatch( int length, List nodes, List buffers, ArrowBodyCompression bodyCompression, boolean alignBuffers, boolean retainBuffers) { + this(length, nodes, buffers, bodyCompression, null, alignBuffers, retainBuffers); + } + + /** + * Construct a record batch from nodes. + * + * @param length how many rows in this batch + * @param nodes field level info + * @param buffers will be retained until this recordBatch is closed + * @param bodyCompression compression info. + * @param variadicBufferCounts the number of buffers in each variadic section. + * @param alignBuffers Whether to align buffers to an 8 byte boundary. + */ + public ArrowRecordBatch( + int length, List nodes, List buffers, + ArrowBodyCompression bodyCompression, List variadicBufferCounts, boolean alignBuffers) { + this(length, nodes, buffers, bodyCompression, variadicBufferCounts, alignBuffers, /*retainBuffers*/ true); + } + + /** + * Construct a record batch from nodes. + * + * @param length how many rows in this batch + * @param nodes field level info + * @param buffers will be retained until this recordBatch is closed + * @param bodyCompression compression info. + * @param variadicBufferCounts the number of buffers in each variadic section. + * @param alignBuffers Whether to align buffers to an 8 byte boundary. + * @param retainBuffers Whether to retain() each source buffer in the constructor. If false, the caller is + * responsible for retaining the buffers beforehand. + */ + public ArrowRecordBatch( + int length, List nodes, List buffers, + ArrowBodyCompression bodyCompression, List variadicBufferCounts, boolean alignBuffers, + boolean retainBuffers) { super(); this.length = length; this.nodes = nodes; this.buffers = buffers; Preconditions.checkArgument(bodyCompression != null, "body compression cannot be null"); this.bodyCompression = bodyCompression; + this.variadicBufferCounts = variadicBufferCounts; List arrowBuffers = new ArrayList<>(buffers.size()); long offset = 0; for (ArrowBuf arrowBuf : buffers) { @@ -129,12 +167,14 @@ public ArrowRecordBatch( // to distinguish this from the public constructor. private ArrowRecordBatch( boolean dummy, int length, List nodes, - List buffers, ArrowBodyCompression bodyCompression) { + List buffers, ArrowBodyCompression bodyCompression, + List variadicBufferCounts) { this.length = length; this.nodes = nodes; this.buffers = buffers; Preconditions.checkArgument(bodyCompression != null, "body compression cannot be null"); this.bodyCompression = bodyCompression; + this.variadicBufferCounts = variadicBufferCounts; this.closed = false; List arrowBuffers = new ArrayList<>(); long offset = 0; @@ -179,6 +219,14 @@ public List getBuffers() { return buffers; } + /** + * Get the record batch variadic buffer counts. + * @return the variadic buffer counts + */ + public List getVariadicBufferCounts() { + return variadicBufferCounts; + } + /** * Create a new ArrowRecordBatch which has the same information as this batch but whose buffers * are owned by that Allocator. @@ -195,7 +243,7 @@ public ArrowRecordBatch cloneWithTransfer(final BufferAllocator allocator) { .writerIndex(buf.writerIndex())) .collect(Collectors.toList()); close(); - return new ArrowRecordBatch(false, length, nodes, newBufs, bodyCompression); + return new ArrowRecordBatch(false, length, nodes, newBufs, bodyCompression, variadicBufferCounts); } /** @@ -217,6 +265,24 @@ public int writeTo(FlatBufferBuilder builder) { if (bodyCompression.getCodec() != NoCompressionCodec.COMPRESSION_TYPE) { compressOffset = bodyCompression.writeTo(builder); } + + // Start the variadicBufferCounts vector. + int variadicBufferCountsOffset = 0; + if (variadicBufferCounts != null && !variadicBufferCounts.isEmpty()) { + variadicBufferCountsOffset = variadicBufferCounts.size(); + int elementSizeInBytes = 8; // Size of long in bytes + builder.startVector(elementSizeInBytes, variadicBufferCountsOffset, elementSizeInBytes); + + // Add each long to the builder. Note that elements should be added in reverse order. + for (int i = variadicBufferCounts.size() - 1; i >= 0; i--) { + long value = variadicBufferCounts.get(i); + builder.addLong(value); + } + + // End the vector. This returns an offset that you can use to refer to the vector. + variadicBufferCountsOffset = builder.endVector(); + } + RecordBatch.startRecordBatch(builder); RecordBatch.addLength(builder, length); RecordBatch.addNodes(builder, nodesOffset); @@ -224,6 +290,12 @@ public int writeTo(FlatBufferBuilder builder) { if (bodyCompression.getCodec() != NoCompressionCodec.COMPRESSION_TYPE) { RecordBatch.addCompression(builder, compressOffset); } + + // Add the variadicBufferCounts to the RecordBatch + if (variadicBufferCounts != null && !variadicBufferCounts.isEmpty()) { + RecordBatch.addVariadicBufferCounts(builder, variadicBufferCountsOffset); + } + return RecordBatch.endRecordBatch(builder); } @@ -247,8 +319,13 @@ public void close() { @Override public String toString() { + int variadicBufCount = 0; + if (variadicBufferCounts != null && !variadicBufferCounts.isEmpty()) { + variadicBufCount = variadicBufferCounts.size(); + } return "ArrowRecordBatch [length=" + length + ", nodes=" + nodes + ", #buffers=" + buffers.size() + - ", buffersLayout=" + buffersLayout + ", closed=" + closed + "]"; + ", #variadicBufferCounts=" + variadicBufCount + ", buffersLayout=" + buffersLayout + + ", closed=" + closed + "]"; } /** diff --git a/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/MessageSerializer.java b/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/MessageSerializer.java index 9deb42c498cbb..099103cd178f8 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/MessageSerializer.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/MessageSerializer.java @@ -701,7 +701,8 @@ public static MessageMetadataResult readMessage(ReadChannel in) throws IOExcepti throw new IOException( "Unexpected end of stream trying to read message."); } - messageBuffer.rewind(); + // see https://github.com/apache/arrow/issues/41717 for reason why we cast to java.nio.Buffer + ByteBuffer rewindBuffer = (ByteBuffer) ((java.nio.Buffer) messageBuffer).rewind(); // Load the message. Message message = Message.getRootAsMessage(messageBuffer); diff --git a/java/vector/src/main/java/org/apache/arrow/vector/types/Types.java b/java/vector/src/main/java/org/apache/arrow/vector/types/Types.java index 0b0e0d66a98f0..abed4d1ff0143 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/types/Types.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/types/Types.java @@ -65,10 +65,13 @@ import org.apache.arrow.vector.ValueVector; import org.apache.arrow.vector.VarBinaryVector; import org.apache.arrow.vector.VarCharVector; +import org.apache.arrow.vector.ViewVarBinaryVector; +import org.apache.arrow.vector.ViewVarCharVector; import org.apache.arrow.vector.complex.DenseUnionVector; import org.apache.arrow.vector.complex.FixedSizeListVector; import org.apache.arrow.vector.complex.LargeListVector; import org.apache.arrow.vector.complex.ListVector; +import org.apache.arrow.vector.complex.ListViewVector; import org.apache.arrow.vector.complex.MapVector; import org.apache.arrow.vector.complex.StructVector; import org.apache.arrow.vector.complex.UnionVector; @@ -114,10 +117,13 @@ import org.apache.arrow.vector.complex.impl.UnionWriter; import org.apache.arrow.vector.complex.impl.VarBinaryWriterImpl; import org.apache.arrow.vector.complex.impl.VarCharWriterImpl; +import org.apache.arrow.vector.complex.impl.ViewVarBinaryWriterImpl; +import org.apache.arrow.vector.complex.impl.ViewVarCharWriterImpl; import org.apache.arrow.vector.complex.writer.FieldWriter; import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.types.pojo.ArrowType.ArrowTypeVisitor; import org.apache.arrow.vector.types.pojo.ArrowType.Binary; +import org.apache.arrow.vector.types.pojo.ArrowType.BinaryView; import org.apache.arrow.vector.types.pojo.ArrowType.Bool; import org.apache.arrow.vector.types.pojo.ArrowType.Date; import org.apache.arrow.vector.types.pojo.ArrowType.Decimal; @@ -131,6 +137,7 @@ import org.apache.arrow.vector.types.pojo.ArrowType.LargeBinary; import org.apache.arrow.vector.types.pojo.ArrowType.LargeUtf8; import org.apache.arrow.vector.types.pojo.ArrowType.List; +import org.apache.arrow.vector.types.pojo.ArrowType.ListView; import org.apache.arrow.vector.types.pojo.ArrowType.Map; import org.apache.arrow.vector.types.pojo.ArrowType.Null; import org.apache.arrow.vector.types.pojo.ArrowType.Struct; @@ -138,6 +145,7 @@ import org.apache.arrow.vector.types.pojo.ArrowType.Timestamp; import org.apache.arrow.vector.types.pojo.ArrowType.Union; import org.apache.arrow.vector.types.pojo.ArrowType.Utf8; +import org.apache.arrow.vector.types.pojo.ArrowType.Utf8View; import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.FieldType; import org.apache.arrow.vector.util.CallBack; @@ -504,6 +512,20 @@ public FieldWriter getNewFieldWriter(ValueVector vector) { return new VarCharWriterImpl((VarCharVector) vector); } }, + VIEWVARCHAR(Utf8View.INSTANCE) { + @Override + public FieldVector getNewVector( + Field field, + BufferAllocator allocator, + CallBack schemaChangeCallback) { + return new ViewVarCharVector(field, allocator); + } + + @Override + public FieldWriter getNewFieldWriter(ValueVector vector) { + return new ViewVarCharWriterImpl((ViewVarCharVector) vector); + } + }, LARGEVARCHAR(LargeUtf8.INSTANCE) { @Override public FieldVector getNewVector( @@ -546,6 +568,20 @@ public FieldWriter getNewFieldWriter(ValueVector vector) { return new VarBinaryWriterImpl((VarBinaryVector) vector); } }, + VIEWVARBINARY(BinaryView.INSTANCE) { + @Override + public FieldVector getNewVector( + Field field, + BufferAllocator allocator, + CallBack schemaChangeCallback) { + return new ViewVarBinaryVector(field, allocator); + } + + @Override + public FieldWriter getNewFieldWriter(ValueVector vector) { + return new ViewVarBinaryWriterImpl((ViewVarBinaryVector) vector); + } + }, DECIMAL(null) { @Override public FieldVector getNewVector( @@ -658,6 +694,20 @@ public FieldWriter getNewFieldWriter(ValueVector vector) { return new UnionListWriter((ListVector) vector); } }, + LISTVIEW(ListView.INSTANCE) { + @Override + public FieldVector getNewVector( + Field field, + BufferAllocator allocator, + CallBack schemaChangeCallback) { + return new ListViewVector(field.getName(), allocator, field.getFieldType(), schemaChangeCallback); + } + + @Override + public FieldWriter getNewFieldWriter(ValueVector vector) { + return new UnionListWriter((ListVector) vector); + } + }, LARGELIST(ArrowType.LargeList.INSTANCE) { @Override public FieldVector getNewVector(Field field, BufferAllocator allocator, CallBack schemaChangeCallback) { @@ -923,6 +973,11 @@ public MinorType visit(Utf8 type) { return MinorType.VARCHAR; } + @Override + public MinorType visit(Utf8View type) { + return MinorType.VIEWVARCHAR; + } + @Override public Types.MinorType visit(LargeUtf8 type) { return MinorType.LARGEVARCHAR; @@ -933,6 +988,11 @@ public MinorType visit(Binary type) { return MinorType.VARBINARY; } + @Override + public MinorType visit(BinaryView type) { + return MinorType.VIEWVARBINARY; + } + @Override public MinorType visit(LargeBinary type) { return MinorType.LARGEVARBINARY; @@ -1020,6 +1080,11 @@ public MinorType visit(Duration type) { return MinorType.DURATION; } + @Override + public MinorType visit(ListView type) { + return MinorType.LISTVIEW; + } + @Override public MinorType visit(ExtensionType type) { return MinorType.EXTENSIONTYPE; diff --git a/java/vector/src/main/java/org/apache/arrow/vector/util/DataSizeRoundingUtil.java b/java/vector/src/main/java/org/apache/arrow/vector/util/DataSizeRoundingUtil.java index 3af2c98374070..5f59933975133 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/util/DataSizeRoundingUtil.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/util/DataSizeRoundingUtil.java @@ -93,6 +93,10 @@ public static long divideBy8Ceil(long input) { return (input + 7) >>> (long) DIVIDE_BY_8_SHIFT_BITS; } + public static long roundUpToMultipleOf16(long num) { + return (num + 15) & 0xFFFFFFFFFFFFFFF0L; + } + private DataSizeRoundingUtil() { } diff --git a/java/vector/src/main/java/org/apache/arrow/vector/util/ReusableByteArray.java b/java/vector/src/main/java/org/apache/arrow/vector/util/ReusableByteArray.java index d938cd833a41a..10a195e1e7b0a 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/util/ReusableByteArray.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/util/ReusableByteArray.java @@ -64,6 +64,13 @@ public void set(ArrowBuf srcBytes, long start, long len) { length = (int) len; } + @Override + public void set(byte[] srcBytes, long start, long len) { + setCapacity((int) len, false); + System.arraycopy(srcBytes, (int) start, bytes, 0, (int) len); + length = (int) len; + } + @Override public boolean equals(Object o) { if (o == this) { diff --git a/java/vector/src/main/java/org/apache/arrow/vector/util/Text.java b/java/vector/src/main/java/org/apache/arrow/vector/util/Text.java index 95e35ce6938c3..ea631c59ce2f2 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/util/Text.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/util/Text.java @@ -235,9 +235,7 @@ public void set(Text other) { * @param len the number of bytes of the new string */ public void set(byte[] utf8, int start, int len) { - setCapacity(len, false); - System.arraycopy(utf8, start, bytes, 0, len); - this.length = len; + super.set(utf8, start, len); } /** diff --git a/java/vector/src/main/java/org/apache/arrow/vector/util/VectorAppender.java b/java/vector/src/main/java/org/apache/arrow/vector/util/VectorAppender.java index 068717c7acbc7..def8ef96877ed 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/util/VectorAppender.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/util/VectorAppender.java @@ -26,6 +26,7 @@ import org.apache.arrow.vector.BaseFixedWidthVector; import org.apache.arrow.vector.BaseLargeVariableWidthVector; import org.apache.arrow.vector.BaseVariableWidthVector; +import org.apache.arrow.vector.BaseVariableWidthViewVector; import org.apache.arrow.vector.BitVector; import org.apache.arrow.vector.BitVectorHelper; import org.apache.arrow.vector.ExtensionTypeVector; @@ -205,6 +206,11 @@ public ValueVector visit(BaseLargeVariableWidthVector deltaVector, Void value) { return targetVector; } + @Override + public ValueVector visit(BaseVariableWidthViewVector left, Void value) { + throw new UnsupportedOperationException("View vectors are not supported."); + } + @Override public ValueVector visit(ListVector deltaVector, Void value) { Preconditions.checkArgument(typeVisitor.equals(deltaVector), diff --git a/java/vector/src/main/java/org/apache/arrow/vector/validate/ValidateVectorBufferVisitor.java b/java/vector/src/main/java/org/apache/arrow/vector/validate/ValidateVectorBufferVisitor.java index d4abaa1945b94..af5a67049f722 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/validate/ValidateVectorBufferVisitor.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/validate/ValidateVectorBufferVisitor.java @@ -23,6 +23,7 @@ import org.apache.arrow.vector.BaseFixedWidthVector; import org.apache.arrow.vector.BaseLargeVariableWidthVector; import org.apache.arrow.vector.BaseVariableWidthVector; +import org.apache.arrow.vector.BaseVariableWidthViewVector; import org.apache.arrow.vector.BitVector; import org.apache.arrow.vector.ExtensionTypeVector; import org.apache.arrow.vector.FieldVector; @@ -50,6 +51,7 @@ private void validateVectorCommon(ValueVector vector) { if (vector instanceof FieldVector) { FieldVector fieldVector = (FieldVector) vector; + // TODO: https://github.com/apache/arrow/issues/41734 int typeBufferCount = TypeLayout.getTypeBufferCount(arrowType); validateOrThrow(fieldVector.getFieldBuffers().size() == typeBufferCount, "Expected %s buffers in vector of type %s, got %s.", @@ -133,6 +135,11 @@ public Void visit(BaseLargeVariableWidthVector vector, Void value) { return null; } + @Override + public Void visit(BaseVariableWidthViewVector vector, Void value) { + throw new UnsupportedOperationException("View vectors are not supported."); + } + @Override public Void visit(ListVector vector, Void value) { int valueCount = vector.getValueCount(); diff --git a/java/vector/src/main/java/org/apache/arrow/vector/validate/ValidateVectorDataVisitor.java b/java/vector/src/main/java/org/apache/arrow/vector/validate/ValidateVectorDataVisitor.java index 6d33be7a0dbac..ddcb658c1a95d 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/validate/ValidateVectorDataVisitor.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/validate/ValidateVectorDataVisitor.java @@ -23,6 +23,7 @@ import org.apache.arrow.vector.BaseFixedWidthVector; import org.apache.arrow.vector.BaseLargeVariableWidthVector; import org.apache.arrow.vector.BaseVariableWidthVector; +import org.apache.arrow.vector.BaseVariableWidthViewVector; import org.apache.arrow.vector.ExtensionTypeVector; import org.apache.arrow.vector.NullVector; import org.apache.arrow.vector.ValueVector; @@ -103,6 +104,11 @@ public Void visit(BaseLargeVariableWidthVector vector, Void value) { return null; } + @Override + public Void visit(BaseVariableWidthViewVector vector, Void value) { + throw new UnsupportedOperationException("View vectors are not supported."); + } + @Override public Void visit(ListVector vector, Void value) { validateOffsetBuffer(vector, vector.getValueCount()); diff --git a/java/vector/src/main/java/org/apache/arrow/vector/validate/ValidateVectorTypeVisitor.java b/java/vector/src/main/java/org/apache/arrow/vector/validate/ValidateVectorTypeVisitor.java index 3d1c5a4f27f7c..bbdabdb1226ad 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/validate/ValidateVectorTypeVisitor.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/validate/ValidateVectorTypeVisitor.java @@ -22,6 +22,7 @@ import org.apache.arrow.vector.BaseFixedWidthVector; import org.apache.arrow.vector.BaseLargeVariableWidthVector; import org.apache.arrow.vector.BaseVariableWidthVector; +import org.apache.arrow.vector.BaseVariableWidthViewVector; import org.apache.arrow.vector.BigIntVector; import org.apache.arrow.vector.BitVector; import org.apache.arrow.vector.DateDayVector; @@ -308,6 +309,11 @@ public Void visit(BaseLargeVariableWidthVector vector, Void value) { return null; } + @Override + public Void visit(BaseVariableWidthViewVector vector, Void value) { + throw new UnsupportedOperationException("View vectors are not supported."); + } + @Override public Void visit(ListVector vector, Void value) { validateVectorCommon(vector, ArrowType.List.class); diff --git a/java/vector/src/main/java/org/apache/arrow/vector/validate/ValidateVectorVisitor.java b/java/vector/src/main/java/org/apache/arrow/vector/validate/ValidateVectorVisitor.java index 7e99b1f90fb61..786a1142a2b0b 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/validate/ValidateVectorVisitor.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/validate/ValidateVectorVisitor.java @@ -23,6 +23,7 @@ import org.apache.arrow.vector.BaseFixedWidthVector; import org.apache.arrow.vector.BaseLargeVariableWidthVector; import org.apache.arrow.vector.BaseVariableWidthVector; +import org.apache.arrow.vector.BaseVariableWidthViewVector; import org.apache.arrow.vector.ExtensionTypeVector; import org.apache.arrow.vector.FieldVector; import org.apache.arrow.vector.NullVector; @@ -98,6 +99,11 @@ public Void visit(BaseLargeVariableWidthVector left, Void value) { return null; } + @Override + public Void visit(BaseVariableWidthViewVector left, Void value) { + throw new UnsupportedOperationException("View vectors are not supported."); + } + @Override public Void visit(ListVector vector, Void value) { diff --git a/java/vector/src/test/java/org/apache/arrow/vector/ITTestLargeVector.java b/java/vector/src/test/java/org/apache/arrow/vector/ITTestLargeVector.java index 8596399e7e08c..b65e6fd36c158 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/ITTestLargeVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/ITTestLargeVector.java @@ -17,9 +17,9 @@ package org.apache.arrow.vector; -import static org.junit.Assert.assertArrayEquals; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertTrue; +import static org.junit.jupiter.api.Assertions.assertArrayEquals; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; import java.math.BigDecimal; import java.nio.charset.StandardCharsets; @@ -28,7 +28,7 @@ import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; import org.apache.arrow.vector.holders.NullableDecimalHolder; -import org.junit.Test; +import org.junit.jupiter.api.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestBitVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestBitVector.java index 075a05c04b641..cebd70fcc5a71 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestBitVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestBitVector.java @@ -17,10 +17,10 @@ package org.apache.arrow.vector; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertSame; -import static org.junit.Assert.assertTrue; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertSame; +import static org.junit.jupiter.api.Assertions.assertTrue; import java.util.stream.IntStream; @@ -29,22 +29,21 @@ import org.apache.arrow.memory.util.hash.MurmurHasher; import org.apache.arrow.vector.testing.ValueVectorDataPopulator; import org.apache.arrow.vector.util.TransferPair; -import org.junit.After; -import org.junit.Assert; -import org.junit.Before; -import org.junit.Test; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; public class TestBitVector { private static final String EMPTY_SCHEMA_PATH = ""; private BufferAllocator allocator; - @Before + @BeforeEach public void init() { allocator = new RootAllocator(Long.MAX_VALUE); } - @After + @AfterEach public void terminate() throws Exception { allocator.close(); } @@ -124,8 +123,8 @@ public void testSplitAndTransfer() throws Exception { for (int i = 0; i < length; i++) { int actual = toVector.get(i); int expected = sourceVector.get(start + i); - assertEquals("different data values not expected --> sourceVector index: " + (start + i) + - " toVector index: " + i, expected, actual); + assertEquals(expected, actual, + "different data values not expected --> sourceVector index: " + (start + i) + " toVector index: " + i); } } } @@ -167,8 +166,8 @@ public void testSplitAndTransfer1() throws Exception { for (int i = 0; i < length; i++) { int actual = toVector.get(i); int expected = sourceVector.get(start + i); - assertEquals("different data values not expected --> sourceVector index: " + (start + i) + - " toVector index: " + i, expected, actual); + assertEquals(expected, actual, + "different data values not expected --> sourceVector index: " + (start + i) + " toVector index: " + i); } } } @@ -218,8 +217,8 @@ public void testSplitAndTransfer2() throws Exception { for (int i = 0; i < length; i++) { int actual = toVector.get(i); int expected = sourceVector.get(start + i); - assertEquals("different data values not expected --> sourceVector index: " + (start + i) + - " toVector index: " + i, expected, actual); + assertEquals(expected, actual, + "different data values not expected --> sourceVector index: " + (start + i) + " toVector index: " + i); } } } @@ -241,9 +240,9 @@ public void testReallocAfterVectorTransfer1() { for (int i = 0; i < valueCapacity; i++) { if ((i & 1) == 1) { - assertEquals("unexpected cleared bit at index: " + i, 1, vector.get(i)); + assertEquals(1, vector.get(i), "unexpected cleared bit at index: " + i); } else { - assertTrue("unexpected set bit at index: " + i, vector.isNull(i)); + assertTrue(vector.isNull(i), "unexpected set bit at index: " + i); } } @@ -259,9 +258,9 @@ public void testReallocAfterVectorTransfer1() { for (int i = 0; i < valueCapacity * 2; i++) { if (((i & 1) == 1) || (i == valueCapacity)) { - assertEquals("unexpected cleared bit at index: " + i, 1, vector.get(i)); + assertEquals(1, vector.get(i), "unexpected cleared bit at index: " + i); } else { - assertTrue("unexpected set bit at index: " + i, vector.isNull(i)); + assertTrue(vector.isNull(i), "unexpected set bit at index: " + i); } } @@ -277,9 +276,9 @@ public void testReallocAfterVectorTransfer1() { for (int i = 0; i < valueCapacity * 4; i++) { if (((i & 1) == 1) || (i == valueCapacity) || (i == valueCapacity * 2)) { - assertEquals("unexpected cleared bit at index: " + i, 1, vector.get(i)); + assertEquals(1, vector.get(i), "unexpected cleared bit at index: " + i); } else { - assertTrue("unexpected set bit at index: " + i, vector.isNull(i)); + assertTrue(vector.isNull(i), "unexpected set bit at index: " + i); } } @@ -297,12 +296,12 @@ public void testReallocAfterVectorTransfer1() { if (i <= valueCapacity * 4) { if (((i & 1) == 1) || (i == valueCapacity) || (i == valueCapacity * 2) || (i == valueCapacity * 4)) { - assertEquals("unexpected cleared bit at index: " + i, 1, toVector.get(i)); + assertEquals(1, toVector.get(i), "unexpected cleared bit at index: " + i); } else { - assertTrue("unexpected set bit at index: " + i, toVector.isNull(i)); + assertTrue(toVector.isNull(i), "unexpected set bit at index: " + i); } } else { - assertTrue("unexpected set bit at index: " + i, toVector.isNull(i)); + assertTrue(toVector.isNull(i), "unexpected set bit at index: " + i); } } @@ -325,9 +324,9 @@ public void testReallocAfterVectorTransfer2() { for (int i = 0; i < valueCapacity; i++) { if ((i & 1) == 1) { - assertFalse("unexpected cleared bit at index: " + i, vector.isNull(i)); + assertFalse(vector.isNull(i), "unexpected cleared bit at index: " + i); } else { - assertTrue("unexpected set bit at index: " + i, vector.isNull(i)); + assertTrue(vector.isNull(i), "unexpected set bit at index: " + i); } } @@ -343,9 +342,9 @@ public void testReallocAfterVectorTransfer2() { for (int i = 0; i < valueCapacity * 2; i++) { if (((i & 1) == 1) || (i == valueCapacity)) { - assertFalse("unexpected cleared bit at index: " + i, vector.isNull(i)); + assertFalse(vector.isNull(i), "unexpected cleared bit at index: " + i); } else { - assertTrue("unexpected set bit at index: " + i, vector.isNull(i)); + assertTrue(vector.isNull(i), "unexpected set bit at index: " + i); } } @@ -361,9 +360,9 @@ public void testReallocAfterVectorTransfer2() { for (int i = 0; i < valueCapacity * 4; i++) { if (((i & 1) == 1) || (i == valueCapacity) || (i == valueCapacity * 2)) { - assertFalse("unexpected cleared bit at index: " + i, vector.isNull(i)); + assertFalse(vector.isNull(i), "unexpected cleared bit at index: " + i); } else { - assertTrue("unexpected set bit at index: " + i, vector.isNull(i)); + assertTrue(vector.isNull(i), "unexpected set bit at index: " + i); } } @@ -381,12 +380,12 @@ public void testReallocAfterVectorTransfer2() { if (i <= valueCapacity * 4) { if (((i & 1) == 1) || (i == valueCapacity) || (i == valueCapacity * 2) || (i == valueCapacity * 4)) { - assertFalse("unexpected cleared bit at index: " + i, toVector.isNull(i)); + assertFalse(toVector.isNull(i), "unexpected cleared bit at index: " + i); } else { - assertTrue("unexpected set bit at index: " + i, toVector.isNull(i)); + assertTrue(toVector.isNull(i), "unexpected set bit at index: " + i); } } else { - assertTrue("unexpected set bit at index: " + i, toVector.isNull(i)); + assertTrue(toVector.isNull(i), "unexpected set bit at index: " + i); } } @@ -500,13 +499,13 @@ private void validateRange(int length, int start, int count) { bitVector.allocateNew(length); bitVector.setRangeToOne(start, count); for (int i = 0; i < start; i++) { - Assert.assertTrue(desc + i, bitVector.isNull(i)); + assertTrue(bitVector.isNull(i), desc + i); } for (int i = start; i < start + count; i++) { - Assert.assertEquals(desc + i, 1, bitVector.get(i)); + assertEquals(1, bitVector.get(i), desc + i); } for (int i = start + count; i < length; i++) { - Assert.assertTrue(desc + i, bitVector.isNull(i)); + assertTrue(bitVector.isNull(i), desc + i); } } } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestBitVectorHelper.java b/java/vector/src/test/java/org/apache/arrow/vector/TestBitVectorHelper.java index 1da4a4c4914b9..b1ef45c918b72 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestBitVectorHelper.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestBitVectorHelper.java @@ -17,16 +17,16 @@ package org.apache.arrow.vector; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertTrue; +import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; import org.apache.arrow.memory.ArrowBuf; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; import org.apache.arrow.memory.util.MemoryUtil; import org.apache.arrow.vector.ipc.message.ArrowFieldNode; -import org.junit.Test; +import org.junit.jupiter.api.Test; public class TestBitVectorHelper { @Test diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestBufferOwnershipTransfer.java b/java/vector/src/test/java/org/apache/arrow/vector/TestBufferOwnershipTransfer.java index 056b6bdd2b787..b38e046659669 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestBufferOwnershipTransfer.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestBufferOwnershipTransfer.java @@ -17,9 +17,9 @@ package org.apache.arrow.vector; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertTrue; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; import java.nio.charset.StandardCharsets; @@ -30,7 +30,7 @@ import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.types.pojo.FieldType; import org.apache.arrow.vector.util.CallBack; -import org.junit.Test; +import org.junit.jupiter.api.Test; public class TestBufferOwnershipTransfer { diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestCopyFrom.java b/java/vector/src/test/java/org/apache/arrow/vector/TestCopyFrom.java index 97de27bec8237..7d4d08636d740 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestCopyFrom.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestCopyFrom.java @@ -18,9 +18,10 @@ package org.apache.arrow.vector; import static org.apache.arrow.vector.TestUtils.newVector; -import static org.junit.Assert.*; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertNull; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertTrue; import java.math.BigDecimal; import java.nio.charset.StandardCharsets; @@ -31,9 +32,9 @@ import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; import org.apache.arrow.vector.types.Types.MinorType; -import org.junit.After; -import org.junit.Before; -import org.junit.Test; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; /* * Tested field types: @@ -60,12 +61,12 @@ public class TestCopyFrom { private BufferAllocator allocator; - @Before + @BeforeEach public void init() { allocator = new RootAllocator(Long.MAX_VALUE); } - @After + @AfterEach public void terminate() throws Exception { allocator.close(); } @@ -99,10 +100,7 @@ public void testCopyFromWithNulls() { if (i % 3 == 0) { assertNull(vector.getObject(i)); } else { - assertEquals( - "unexpected value at index: " + i, - Integer.toString(i), - vector.getObject(i).toString()); + assertEquals(Integer.toString(i), vector.getObject(i).toString(), "unexpected value at index: " + i); } } @@ -116,10 +114,7 @@ public void testCopyFromWithNulls() { if (i % 3 == 0) { assertNull(vector2.getObject(i)); } else { - assertEquals( - "unexpected value at index: " + i, - Integer.toString(i), - vector2.getObject(i).toString()); + assertEquals(Integer.toString(i), vector2.getObject(i).toString(), "unexpected value at index: " + i); } } @@ -133,10 +128,7 @@ public void testCopyFromWithNulls() { if (i % 3 == 0) { assertNull(vector2.getObject(i)); } else { - assertEquals( - "unexpected value at index: " + i, - Integer.toString(i), - vector2.getObject(i).toString()); + assertEquals(Integer.toString(i), vector2.getObject(i).toString(), "unexpected value at index: " + i); } } } @@ -171,10 +163,7 @@ public void testCopyFromWithNulls1() { if (i % 3 == 0) { assertNull(vector.getObject(i)); } else { - assertEquals( - "unexpected value at index: " + i, - Integer.toString(i), - vector.getObject(i).toString()); + assertEquals(Integer.toString(i), vector.getObject(i).toString(), "unexpected value at index: " + i); } } @@ -192,10 +181,7 @@ public void testCopyFromWithNulls1() { if (i % 3 == 0) { assertNull(vector2.getObject(i)); } else { - assertEquals( - "unexpected value at index: " + i, - Integer.toString(i), - vector2.getObject(i).toString()); + assertEquals(Integer.toString(i), vector2.getObject(i).toString(), "unexpected value at index: " + i); } } @@ -209,10 +195,7 @@ public void testCopyFromWithNulls1() { if (i % 3 == 0) { assertNull(vector2.getObject(i)); } else { - assertEquals( - "unexpected value at index: " + i, - Integer.toString(i), - vector2.getObject(i).toString()); + assertEquals(Integer.toString(i), vector2.getObject(i).toString(), "unexpected value at index: " + i); } } } @@ -247,7 +230,7 @@ public void testCopyFromWithNulls2() { if ((i & 1) == 0) { assertNull(vector1.getObject(i)); } else { - assertEquals("unexpected value at index: " + i, 1000 + i, vector1.get(i)); + assertEquals(1000 + i, vector1.get(i), "unexpected value at index: " + i); } } @@ -274,7 +257,7 @@ public void testCopyFromWithNulls2() { if (((i & 1) == 0) || (i >= initialCapacity)) { assertNull(vector2.getObject(i)); } else { - assertEquals("unexpected value at index: " + i, 1000 + i, vector2.get(i)); + assertEquals(1000 + i, vector2.get(i), "unexpected value at index: " + i); } } } @@ -309,7 +292,7 @@ public void testCopyFromWithNulls3() { if ((i & 1) == 0) { assertNull(vector1.getObject(i)); } else { - assertEquals("unexpected value at index: " + i, 10000000000L + (long) i, vector1.get(i)); + assertEquals(10000000000L + (long) i, vector1.get(i), "unexpected value at index: " + i); } } @@ -336,7 +319,7 @@ public void testCopyFromWithNulls3() { if (((i & 1) == 0) || (i >= initialCapacity)) { assertNull(vector2.getObject(i)); } else { - assertEquals("unexpected value at index: " + i, 10000000000L + (long) i, vector2.get(i)); + assertEquals(10000000000L + (long) i, vector2.get(i), "unexpected value at index: " + i); } } } @@ -450,7 +433,7 @@ public void testCopyFromWithNulls5() { if ((i & 1) == 0) { assertNull(vector1.getObject(i)); } else { - assertEquals("unexpected value at index: " + i, 100.25f + (float) i, vector1.get(i), 0); + assertEquals(100.25f + (float) i, vector1.get(i), 0, "unexpected value at index: " + i); } } @@ -477,7 +460,7 @@ public void testCopyFromWithNulls5() { if (((i & 1) == 0) || (i >= initialCapacity)) { assertNull(vector2.getObject(i)); } else { - assertEquals("unexpected value at index: " + i, 100.25f + i * 1.0f, vector2.get(i), 0); + assertEquals(100.25f + i * 1.0f, vector2.get(i), 0, "unexpected value at index: " + i); } } } @@ -512,8 +495,7 @@ public void testCopyFromWithNulls6() { if ((i & 1) == 0) { assertNull(vector1.getObject(i)); } else { - assertEquals( - "unexpected value at index: " + i, 123456.7865 + (double) i, vector1.get(i), 0); + assertEquals(123456.7865 + (double) i, vector1.get(i), 0, "unexpected value at index: " + i); } } @@ -540,8 +522,7 @@ public void testCopyFromWithNulls6() { if (((i & 1) == 0) || (i >= initialCapacity)) { assertNull(vector2.getObject(i)); } else { - assertEquals( - "unexpected value at index: " + i, 123456.7865 + (double) i, vector2.get(i), 0); + assertEquals(123456.7865 + (double) i, vector2.get(i), 0, "unexpected value at index: " + i); } } } @@ -715,7 +696,7 @@ public void testCopyFromWithNulls9() { if ((i & 1) == 0) { assertNull(vector1.getObject(i)); } else { - assertEquals("unexpected value at index: " + i, val + (short) i, vector1.get(i)); + assertEquals(val + (short) i, vector1.get(i), "unexpected value at index: " + i); } } @@ -742,7 +723,7 @@ public void testCopyFromWithNulls9() { if (((i & 1) == 0) || (i >= initialCapacity)) { assertNull(vector2.getObject(i)); } else { - assertEquals("unexpected value at index: " + i, val + (short) i, vector2.get(i)); + assertEquals(val + (short) i, vector2.get(i), "unexpected value at index: " + i); } } } @@ -778,7 +759,7 @@ public void testCopyFromWithNulls10() { if ((i & 1) == 0) { assertNull(vector1.getObject(i)); } else { - assertEquals("unexpected value at index: " + i, val + (long) i, vector1.get(i)); + assertEquals(val + (long) i, vector1.get(i), "unexpected value at index: " + i); } } @@ -805,7 +786,7 @@ public void testCopyFromWithNulls10() { if (((i & 1) == 0) || (i >= initialCapacity)) { assertNull(vector2.getObject(i)); } else { - assertEquals("unexpected value at index: " + i, val + (long) i, vector2.get(i)); + assertEquals(val + (long) i, vector2.get(i), "unexpected value at index: " + i); } } } @@ -841,7 +822,7 @@ public void testCopyFromWithNulls11() { if ((i & 1) == 0) { assertNull(vector1.getObject(i)); } else { - assertEquals("unexpected value at index: " + i, val + i, vector1.get(i)); + assertEquals(val + i, vector1.get(i), "unexpected value at index: " + i); } } @@ -868,7 +849,7 @@ public void testCopyFromWithNulls11() { if (((i & 1) == 0) || (i >= initialCapacity)) { assertNull(vector2.getObject(i)); } else { - assertEquals("unexpected value at index: " + i, val + i, vector2.get(i)); + assertEquals(val + i, vector2.get(i), "unexpected value at index: " + i); } } } @@ -906,7 +887,7 @@ public void testCopyFromWithNulls12() { if ((i & 1) == 0) { assertNull(vector1.getObject(i)); } else { - assertEquals("unexpected value at index: " + i, val, vector1.get(i)); + assertEquals(val, vector1.get(i), "unexpected value at index: " + i); val++; } } @@ -934,7 +915,7 @@ public void testCopyFromWithNulls12() { if (((i & 1) == 0) || (i >= initialCapacity)) { assertNull(vector2.getObject(i)); } else { - assertEquals("unexpected value at index: " + i, val, vector2.get(i)); + assertEquals(val, vector2.get(i), "unexpected value at index: " + i); val++; } } @@ -1039,7 +1020,7 @@ public void testCopyFromWithNulls14() { if ((i & 1) == 0) { assertNull(vector1.getObject(i)); } else { - assertEquals("unexpected value at index: " + i, val + (long) i, vector1.get(i)); + assertEquals(val + (long) i, vector1.get(i), "unexpected value at index: " + i); } } @@ -1066,7 +1047,7 @@ public void testCopyFromWithNulls14() { if (((i & 1) == 0) || (i >= initialCapacity)) { assertNull(vector2.getObject(i)); } else { - assertEquals("unexpected value at index: " + i, val + (long) i, vector2.get(i)); + assertEquals(val + (long) i, vector2.get(i), "unexpected value at index: " + i); } } } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestDecimal256Vector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestDecimal256Vector.java index fc5dfc38587a4..6886abcc63cdf 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestDecimal256Vector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestDecimal256Vector.java @@ -17,10 +17,10 @@ package org.apache.arrow.vector; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertSame; -import static org.junit.Assert.assertTrue; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertSame; import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; import java.math.BigDecimal; import java.math.BigInteger; @@ -29,9 +29,9 @@ import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.util.TransferPair; -import org.junit.After; -import org.junit.Before; -import org.junit.Test; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; public class TestDecimal256Vector { @@ -49,12 +49,12 @@ public class TestDecimal256Vector { private BufferAllocator allocator; - @Before + @BeforeEach public void init() { allocator = new DirtyRootAllocator(Long.MAX_VALUE, (byte) 100); } - @After + @AfterEach public void terminate() throws Exception { allocator.close(); } @@ -80,7 +80,7 @@ public void testValuesWriteRead() { for (int i = 0; i < intValues.length; i++) { BigDecimal value = decimalVector.getObject(i); - assertEquals("unexpected data at index: " + i, values[i], value); + assertEquals(values[i], value, "unexpected data at index: " + i); } } } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestDecimalVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestDecimalVector.java index 572f13fea1ed1..c7a12fd6ac87c 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestDecimalVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestDecimalVector.java @@ -17,10 +17,10 @@ package org.apache.arrow.vector; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertSame; -import static org.junit.Assert.assertTrue; -import static org.junit.Assert.fail; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertSame; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.junit.jupiter.api.Assertions.fail; import java.math.BigDecimal; import java.math.BigInteger; @@ -29,9 +29,9 @@ import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.util.TransferPair; -import org.junit.After; -import org.junit.Before; -import org.junit.Test; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; public class TestDecimalVector { @@ -49,12 +49,12 @@ public class TestDecimalVector { private BufferAllocator allocator; - @Before + @BeforeEach public void init() { allocator = new DirtyRootAllocator(Long.MAX_VALUE, (byte) 100); } - @After + @AfterEach public void terminate() throws Exception { allocator.close(); } @@ -80,7 +80,7 @@ public void testValuesWriteRead() { for (int i = 0; i < intValues.length; i++) { BigDecimal value = decimalVector.getObject(i); - assertEquals("unexpected data at index: " + i, values[i], value); + assertEquals(values[i], value, "unexpected data at index: " + i); } } } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestDenseUnionVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestDenseUnionVector.java index 0621fd4527520..0b74f760d2941 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestDenseUnionVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestDenseUnionVector.java @@ -17,10 +17,10 @@ package org.apache.arrow.vector; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertNull; -import static org.junit.Assert.assertTrue; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertTrue; import java.util.ArrayList; import java.util.HashMap; @@ -47,21 +47,21 @@ import org.apache.arrow.vector.util.JsonStringHashMap; import org.apache.arrow.vector.util.Text; import org.apache.arrow.vector.util.TransferPair; -import org.junit.After; -import org.junit.Before; -import org.junit.Test; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; public class TestDenseUnionVector { private static final String EMPTY_SCHEMA_PATH = ""; private BufferAllocator allocator; - @Before + @BeforeEach public void init() { allocator = new DirtyRootAllocator(Long.MAX_VALUE, (byte) 100); } - @After + @AfterEach public void terminate() throws Exception { allocator.close(); } @@ -262,8 +262,8 @@ public void testSplitAndTransfer() throws Exception { /* check the toVector output after doing the splitAndTransfer */ for (int i = 0; i < length; i++) { - assertEquals("Different data at indexes: " + (start + i) + "and " + i, sourceVector.getObject(start + i), - toVector.getObject(i)); + assertEquals(sourceVector.getObject(start + i), toVector.getObject(i), + "Different data at indexes: " + (start + i) + "and " + i); } } } @@ -356,7 +356,8 @@ public void testSplitAndTransferWithMixedVectors() throws Exception { /* check the toVector output after doing the splitAndTransfer */ for (int i = 0; i < length; i++) { - assertEquals("Different values at index: " + i, sourceVector.getObject(start + i), toVector.getObject(i)); + assertEquals(sourceVector.getObject(start + i), toVector.getObject(i), + "Different values at index: " + i); } } } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestDictionaryVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestDictionaryVector.java index 9ffa79470eeb8..caccc2360e85c 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestDictionaryVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestDictionaryVector.java @@ -19,7 +19,11 @@ import static org.apache.arrow.vector.TestUtils.*; import static org.apache.arrow.vector.testing.ValueVectorDataPopulator.setVector; -import static org.junit.Assert.*; +import static org.junit.jupiter.api.Assertions.assertArrayEquals; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.junit.jupiter.api.Assertions.fail; import java.nio.charset.StandardCharsets; import java.util.Arrays; @@ -49,9 +53,9 @@ import org.apache.arrow.vector.types.pojo.DictionaryEncoding; import org.apache.arrow.vector.types.pojo.FieldType; import org.apache.arrow.vector.util.Text; -import org.junit.After; -import org.junit.Before; -import org.junit.Test; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; public class TestDictionaryVector { @@ -63,12 +67,12 @@ public class TestDictionaryVector { byte[][] data = new byte[][] {zero, one, two}; - @Before + @BeforeEach public void init() { allocator = new DirtyRootAllocator(Long.MAX_VALUE, (byte) 100); } - @After + @AfterEach public void terminate() throws Exception { allocator.close(); } @@ -896,7 +900,7 @@ public void testNoMemoryLeak() { assertEquals("Dictionary encoding not defined for value:" + new Text(two), e.getMessage()); } } - assertEquals("encode memory leak", 0, allocator.getAllocatedMemory()); + assertEquals(0, allocator.getAllocatedMemory(), "encode memory leak"); // test no memory leak when decode try (final IntVector indices = newVector(IntVector.class, "", Types.MinorType.INT, allocator); @@ -914,7 +918,7 @@ public void testNoMemoryLeak() { assertEquals("Provided dictionary does not contain value for index 3", e.getMessage()); } } - assertEquals("decode memory leak", 0, allocator.getAllocatedMemory()); + assertEquals(0, allocator.getAllocatedMemory(), "decode memory leak"); } @Test @@ -942,7 +946,7 @@ public void testListNoMemoryLeak() { assertEquals("Dictionary encoding not defined for value:20", e.getMessage()); } } - assertEquals("list encode memory leak", 0, allocator.getAllocatedMemory()); + assertEquals(0, allocator.getAllocatedMemory(), "list encode memory leak"); try (final ListVector indices = ListVector.empty("indices", allocator); final ListVector dictionaryVector = ListVector.empty("dict", allocator)) { @@ -966,7 +970,7 @@ public void testListNoMemoryLeak() { assertEquals("Provided dictionary does not contain value for index 3", e.getMessage()); } } - assertEquals("list decode memory leak", 0, allocator.getAllocatedMemory()); + assertEquals(0, allocator.getAllocatedMemory(), "list decode memory leak"); } @Test @@ -1003,7 +1007,7 @@ public void testStructNoMemoryLeak() { assertEquals("Dictionary encoding not defined for value:baz", e.getMessage()); } } - assertEquals("struct encode memory leak", 0, allocator.getAllocatedMemory()); + assertEquals(0, allocator.getAllocatedMemory(), "struct encode memory leak"); try (final StructVector indices = StructVector.empty("indices", allocator); final VarCharVector dictVector1 = new VarCharVector("f0", allocator); @@ -1040,7 +1044,7 @@ public void testStructNoMemoryLeak() { assertEquals("Provided dictionary does not contain value for index 3", e.getMessage()); } } - assertEquals("struct decode memory leak", 0, allocator.getAllocatedMemory()); + assertEquals(0, allocator.getAllocatedMemory(), "struct decode memory leak"); } private void testDictionary(Dictionary dictionary, ToIntBiFunction valGetter) { diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestDurationVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestDurationVector.java index c5d4d296cc024..6ed44be849726 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestDurationVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestDurationVector.java @@ -17,9 +17,9 @@ package org.apache.arrow.vector; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertNull; -import static org.junit.Assert.assertSame; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertSame; import java.time.Duration; @@ -28,19 +28,19 @@ import org.apache.arrow.vector.types.TimeUnit; import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.util.TransferPair; -import org.junit.After; -import org.junit.Before; -import org.junit.Test; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; public class TestDurationVector { RootAllocator allocator; - @Before + @BeforeEach public void init() { allocator = new DirtyRootAllocator(Long.MAX_VALUE, (byte) 100); } - @After + @AfterEach public void terminate() { allocator.close(); } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestFixedSizeBinaryVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestFixedSizeBinaryVector.java index b9cd89e4ad731..4b52c7a41ff07 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestFixedSizeBinaryVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestFixedSizeBinaryVector.java @@ -17,8 +17,12 @@ package org.apache.arrow.vector; -import static org.junit.Assert.*; +import static org.junit.jupiter.api.Assertions.assertArrayEquals; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertSame; import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; import org.apache.arrow.memory.ArrowBuf; import org.apache.arrow.memory.BufferAllocator; @@ -26,9 +30,9 @@ import org.apache.arrow.vector.holders.NullableFixedSizeBinaryHolder; import org.apache.arrow.vector.util.ReusableByteArray; import org.apache.arrow.vector.util.TransferPair; -import org.junit.After; -import org.junit.Before; -import org.junit.Test; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; public class TestFixedSizeBinaryVector { private static final int numValues = 123; @@ -85,7 +89,7 @@ private static void failWithException(String message) throws Exception { } - @Before + @BeforeEach public void init() throws Exception { allocator = new DirtyRootAllocator(Integer.MAX_VALUE, (byte) 100); vector = new FixedSizeBinaryVector("fixedSizeBinary", allocator, typeWidth); @@ -128,7 +132,7 @@ public void init() throws Exception { largeNullableHolder.buffer = largeBuf; } - @After + @AfterEach public void terminate() throws Exception { for (int i = 0; i < numValues; i++) { bufs[i].close(); diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestFixedSizeListVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestFixedSizeListVector.java index bde6dd491dd71..54ce8e2ae0e7c 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestFixedSizeListVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestFixedSizeListVector.java @@ -17,11 +17,12 @@ package org.apache.arrow.vector; -import static org.junit.Assert.assertArrayEquals; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertTrue; +import static org.junit.jupiter.api.Assertions.assertArrayEquals; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertNull; import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; import java.math.BigDecimal; import java.nio.ByteBuffer; @@ -41,21 +42,20 @@ import org.apache.arrow.vector.types.pojo.FieldType; import org.apache.arrow.vector.util.Text; import org.apache.arrow.vector.util.TransferPair; -import org.junit.After; -import org.junit.Assert; -import org.junit.Before; -import org.junit.Test; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; public class TestFixedSizeListVector { private BufferAllocator allocator; - @Before + @BeforeEach public void init() { allocator = new DirtyRootAllocator(Long.MAX_VALUE, (byte) 100); } - @After + @AfterEach public void terminate() throws Exception { allocator.close(); } @@ -76,12 +76,12 @@ public void testIntType() { UnionFixedSizeListReader reader = vector.getReader(); for (int i = 0; i < 10; i++) { reader.setPosition(i); - Assert.assertTrue(reader.isSet()); - Assert.assertTrue(reader.next()); + assertTrue(reader.isSet()); + assertTrue(reader.next()); assertEquals(i, reader.reader().readInteger().intValue()); - Assert.assertTrue(reader.next()); + assertTrue(reader.next()); assertEquals(i + 10, reader.reader().readInteger().intValue()); - Assert.assertFalse(reader.next()); + assertFalse(reader.next()); assertEquals(Arrays.asList(i, i + 10), reader.readObject()); } } @@ -107,16 +107,16 @@ public void testFloatTypeNullable() { for (int i = 0; i < 10; i++) { reader.setPosition(i); if (i % 2 == 0) { - Assert.assertTrue(reader.isSet()); - Assert.assertTrue(reader.next()); + assertTrue(reader.isSet()); + assertTrue(reader.next()); assertEquals(i + 0.1f, reader.reader().readFloat(), 0.00001); - Assert.assertTrue(reader.next()); + assertTrue(reader.next()); assertEquals(i + 10.1f, reader.reader().readFloat(), 0.00001); - Assert.assertFalse(reader.next()); + assertFalse(reader.next()); assertEquals(Arrays.asList(i + 0.1f, i + 10.1f), reader.readObject()); } else { - Assert.assertFalse(reader.isSet()); - Assert.assertNull(reader.readObject()); + assertFalse(reader.isSet()); + assertNull(reader.readObject()); } } } @@ -149,18 +149,18 @@ public void testNestedInList() { reader.setPosition(i); if (i % 2 == 0) { for (int j = 0; j < i % 7; j++) { - Assert.assertTrue(reader.next()); + assertTrue(reader.next()); FieldReader innerListReader = reader.reader(); for (int k = 0; k < 2; k++) { - Assert.assertTrue(innerListReader.next()); + assertTrue(innerListReader.next()); assertEquals(k + j, innerListReader.reader().readInteger().intValue()); } - Assert.assertFalse(innerListReader.next()); + assertFalse(innerListReader.next()); } - Assert.assertFalse(reader.next()); + assertFalse(reader.next()); } else { - Assert.assertFalse(reader.isSet()); - Assert.assertNull(reader.readObject()); + assertFalse(reader.isSet()); + assertNull(reader.readObject()); } } } @@ -196,40 +196,40 @@ public void testTransferPair() { UnionFixedSizeListReader reader = to.getReader(); reader.setPosition(0); - Assert.assertFalse(reader.isSet()); - Assert.assertNull(reader.readObject()); + assertFalse(reader.isSet()); + assertNull(reader.readObject()); reader.setPosition(1); - Assert.assertTrue(reader.isSet()); - Assert.assertTrue(reader.next()); + assertTrue(reader.isSet()); + assertTrue(reader.next()); assertEquals(0.1f, reader.reader().readFloat(), 0.00001); - Assert.assertTrue(reader.next()); + assertTrue(reader.next()); assertEquals(10.1f, reader.reader().readFloat(), 0.00001); - Assert.assertFalse(reader.next()); + assertFalse(reader.next()); assertEquals(Arrays.asList(0.1f, 10.1f), reader.readObject()); reader.setPosition(2); - Assert.assertTrue(reader.isSet()); - Assert.assertTrue(reader.next()); + assertTrue(reader.isSet()); + assertTrue(reader.next()); assertEquals(2.1f, reader.reader().readFloat(), 0.00001); - Assert.assertTrue(reader.next()); + assertTrue(reader.next()); assertEquals(12.1f, reader.reader().readFloat(), 0.00001); - Assert.assertFalse(reader.next()); + assertFalse(reader.next()); assertEquals(Arrays.asList(2.1f, 12.1f), reader.readObject()); reader.setPosition(3); - Assert.assertTrue(reader.isSet()); - Assert.assertTrue(reader.next()); + assertTrue(reader.isSet()); + assertTrue(reader.next()); assertEquals(4.1f, reader.reader().readFloat(), 0.00001); - Assert.assertTrue(reader.next()); + assertTrue(reader.next()); assertEquals(14.1f, reader.reader().readFloat(), 0.00001); - Assert.assertFalse(reader.next()); + assertFalse(reader.next()); assertEquals(Arrays.asList(4.1f, 14.1f), reader.readObject()); for (int i = 4; i < 10; i++) { reader.setPosition(i); - Assert.assertFalse(reader.isSet()); - Assert.assertNull(reader.readObject()); + assertFalse(reader.isSet()); + assertNull(reader.readObject()); } } } @@ -238,11 +238,11 @@ public void testTransferPair() { public void testConsistentChildName() throws Exception { try (FixedSizeListVector listVector = FixedSizeListVector.empty("sourceVector", /*size=*/2, allocator)) { String emptyListStr = listVector.getField().toString(); - Assert.assertTrue(emptyListStr.contains(ListVector.DATA_VECTOR_NAME)); + assertTrue(emptyListStr.contains(ListVector.DATA_VECTOR_NAME)); listVector.addOrGetVector(FieldType.nullable(MinorType.INT.getType())); String emptyVectorStr = listVector.getField().toString(); - Assert.assertTrue(emptyVectorStr.contains(ListVector.DATA_VECTOR_NAME)); + assertTrue(emptyVectorStr.contains(ListVector.DATA_VECTOR_NAME)); } } @@ -354,27 +354,29 @@ public void testDecimalIndexCheck() throws Exception { } - @Test(expected = IllegalStateException.class) + @Test public void testWriteIllegalData() throws Exception { - try (final FixedSizeListVector vector1 = FixedSizeListVector.empty("vector", /*size=*/3, allocator)) { + assertThrows(IllegalStateException.class, () -> { + try (final FixedSizeListVector vector1 = FixedSizeListVector.empty("vector", /*size=*/3, allocator)) { - UnionFixedSizeListWriter writer1 = vector1.getWriter(); - writer1.allocate(); + UnionFixedSizeListWriter writer1 = vector1.getWriter(); + writer1.allocate(); - int[] values1 = new int[] {1, 2, 3}; - int[] values2 = new int[] {4, 5, 6, 7, 8}; + int[] values1 = new int[]{1, 2, 3}; + int[] values2 = new int[]{4, 5, 6, 7, 8}; - //set some values - writeListVector(vector1, writer1, values1); - writeListVector(vector1, writer1, values2); - writer1.setValueCount(3); + //set some values + writeListVector(vector1, writer1, values1); + writeListVector(vector1, writer1, values2); + writer1.setValueCount(3); - assertEquals(3, vector1.getValueCount()); - int[] realValue1 = convertListToIntArray(vector1.getObject(0)); - assertTrue(Arrays.equals(values1, realValue1)); - int[] realValue2 = convertListToIntArray(vector1.getObject(1)); - assertTrue(Arrays.equals(values2, realValue2)); - } + assertEquals(3, vector1.getValueCount()); + int[] realValue1 = convertListToIntArray(vector1.getObject(0)); + assertTrue(Arrays.equals(values1, realValue1)); + int[] realValue2 = convertListToIntArray(vector1.getObject(1)); + assertTrue(Arrays.equals(values2, realValue2)); + } + }); } @Test diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestIntervalMonthDayNanoVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestIntervalMonthDayNanoVector.java index 681897b93c12c..82bf1dd423b5e 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestIntervalMonthDayNanoVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestIntervalMonthDayNanoVector.java @@ -17,8 +17,7 @@ package org.apache.arrow.vector; -import static org.junit.Assert.assertEquals; - +import static org.junit.jupiter.api.Assertions.assertEquals; import java.time.Duration; import java.time.Period; @@ -29,20 +28,20 @@ import org.apache.arrow.vector.types.IntervalUnit; import org.apache.arrow.vector.types.Types; import org.apache.arrow.vector.types.pojo.ArrowType; -import org.junit.After; -import org.junit.Before; -import org.junit.Test; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; public class TestIntervalMonthDayNanoVector { private BufferAllocator allocator; - @Before + @BeforeEach public void init() { allocator = new DirtyRootAllocator(Long.MAX_VALUE, (byte) 100); } - @After + @AfterEach public void terminate() throws Exception { allocator.close(); } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestIntervalYearVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestIntervalYearVector.java index 4b2ae2eb3d49b..6cb72f38307df 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestIntervalYearVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestIntervalYearVector.java @@ -17,28 +17,28 @@ package org.apache.arrow.vector; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertSame; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertSame; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.vector.types.IntervalUnit; import org.apache.arrow.vector.types.Types; import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.util.TransferPair; -import org.junit.After; -import org.junit.Before; -import org.junit.Test; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; public class TestIntervalYearVector { private BufferAllocator allocator; - @Before + @BeforeEach public void init() { allocator = new DirtyRootAllocator(Long.MAX_VALUE, (byte) 100); } - @After + @AfterEach public void terminate() throws Exception { allocator.close(); } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestLargeListVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestLargeListVector.java index ffd87c99d508d..d4bb3d4c97bcf 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestLargeListVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestLargeListVector.java @@ -17,11 +17,11 @@ package org.apache.arrow.vector; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertNull; -import static org.junit.Assert.assertSame; -import static org.junit.Assert.assertTrue; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertSame; +import static org.junit.jupiter.api.Assertions.assertTrue; import java.util.ArrayList; import java.util.Arrays; @@ -39,21 +39,20 @@ import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.FieldType; import org.apache.arrow.vector.util.TransferPair; -import org.junit.After; -import org.junit.Assert; -import org.junit.Before; -import org.junit.Test; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; public class TestLargeListVector { private BufferAllocator allocator; - @Before + @BeforeEach public void init() { allocator = new DirtyRootAllocator(Long.MAX_VALUE, (byte) 100); } - @After + @AfterEach public void terminate() throws Exception { allocator.close(); } @@ -91,11 +90,11 @@ public void testCopyFrom() throws Exception { // assert the output vector is correct FieldReader reader = outVector.getReader(); - Assert.assertTrue("shouldn't be null", reader.isSet()); + assertTrue(reader.isSet(), "shouldn't be null"); reader.setPosition(1); - Assert.assertFalse("should be null", reader.isSet()); + assertFalse(reader.isSet(), "should be null"); reader.setPosition(2); - Assert.assertTrue("shouldn't be null", reader.isSet()); + assertTrue(reader.isSet(), "shouldn't be null"); /* index 0 */ @@ -433,15 +432,15 @@ public void testSplitAndTransfer() throws Exception { dataLength2 = (int) toOffsetBuffer.getLong((i + 1) * LargeListVector.OFFSET_WIDTH) - (int) toOffsetBuffer.getLong(i * LargeListVector.OFFSET_WIDTH); - assertEquals("Different data lengths at index: " + i + " and start: " + start, - dataLength1, dataLength2); + assertEquals(dataLength1, dataLength2, + "Different data lengths at index: " + i + " and start: " + start); offset1 = (int) offsetBuffer.getLong((start + i) * LargeListVector.OFFSET_WIDTH); offset2 = (int) toOffsetBuffer.getLong(i * LargeListVector.OFFSET_WIDTH); for (int j = 0; j < dataLength1; j++) { - assertEquals("Different data at indexes: " + offset1 + " and " + offset2, - dataVector.getObject(offset1), dataVector1.getObject(offset2)); + assertEquals(dataVector.getObject(offset1), dataVector1.getObject(offset2), + "Different data at indexes: " + offset1 + " and " + offset2); offset1++; offset2++; diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestLargeVarBinaryVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestLargeVarBinaryVector.java index 36607903b01a2..3a51cca51706c 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestLargeVarBinaryVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestLargeVarBinaryVector.java @@ -17,10 +17,10 @@ package org.apache.arrow.vector; -import static org.junit.Assert.assertArrayEquals; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertSame; -import static org.junit.Assert.assertTrue; +import static org.junit.jupiter.api.Assertions.assertArrayEquals; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertSame; +import static org.junit.jupiter.api.Assertions.assertTrue; import java.nio.charset.StandardCharsets; import java.util.Arrays; @@ -32,20 +32,20 @@ import org.apache.arrow.vector.holders.NullableLargeVarBinaryHolder; import org.apache.arrow.vector.util.ReusableByteArray; import org.apache.arrow.vector.util.TransferPair; -import org.junit.After; -import org.junit.Before; -import org.junit.Test; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; public class TestLargeVarBinaryVector { private BufferAllocator allocator; - @Before + @BeforeEach public void prepare() { allocator = new RootAllocator(Integer.MAX_VALUE); } - @After + @AfterEach public void shutdown() { allocator.close(); } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestLargeVarCharVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestLargeVarCharVector.java index 06b27a9eba156..aa9c7fed38a6b 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestLargeVarCharVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestLargeVarCharVector.java @@ -17,12 +17,14 @@ package org.apache.arrow.vector; -import static org.junit.Assert.assertArrayEquals; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertNull; -import static org.junit.Assert.assertSame; -import static org.junit.Assert.assertTrue; +import static org.junit.jupiter.api.Assertions.assertArrayEquals; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertSame; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.junit.jupiter.api.Assertions.fail; import java.nio.charset.StandardCharsets; import java.util.ArrayList; @@ -41,11 +43,9 @@ import org.apache.arrow.vector.util.OversizedAllocationException; import org.apache.arrow.vector.util.Text; import org.apache.arrow.vector.util.TransferPair; -import org.junit.After; -import org.junit.Assert; -import org.junit.Before; -import org.junit.Test; -import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; public class TestLargeVarCharVector { @@ -58,12 +58,12 @@ public class TestLargeVarCharVector { private BufferAllocator allocator; - @Before + @BeforeEach public void prepare() { allocator = new RootAllocator(Integer.MAX_VALUE); } - @After + @AfterEach public void shutdown() { allocator.close(); } @@ -162,7 +162,7 @@ public void testInvalidStartIndex() { final TransferPair tp = largeVarCharVector.makeTransferPair(newLargeVarCharVector); - IllegalArgumentException e = Assertions.assertThrows( + IllegalArgumentException e = assertThrows( IllegalArgumentException.class, () -> tp.splitAndTransfer(valueCount, 10)); @@ -181,7 +181,7 @@ public void testInvalidLength() { final TransferPair tp = largeVarCharVector.makeTransferPair(newLargeVarCharVector); - IllegalArgumentException e = Assertions.assertThrows( + IllegalArgumentException e = assertThrows( IllegalArgumentException.class, () -> tp.splitAndTransfer(0, valueCount * 2)); @@ -298,39 +298,43 @@ public void testSetLastSetUsage() { } } - @Test(expected = OutOfMemoryException.class) + @Test public void testVectorAllocateNew() { - try (RootAllocator smallAllocator = new RootAllocator(200); - LargeVarCharVector vector = new LargeVarCharVector("vec", smallAllocator)) { - vector.allocateNew(); - } + assertThrows(OutOfMemoryException.class, () -> { + try (RootAllocator smallAllocator = new RootAllocator(200); + LargeVarCharVector vector = new LargeVarCharVector("vec", smallAllocator)) { + vector.allocateNew(); + } + }); } - @Test(expected = OversizedAllocationException.class) + @Test public void testLargeVariableVectorReallocation() { - final LargeVarCharVector vector = new LargeVarCharVector("vector", allocator); - // edge case 1: value count = MAX_VALUE_ALLOCATION - final long expectedAllocationInBytes = BaseValueVector.MAX_ALLOCATION_SIZE; - final int expectedOffsetSize = 10; - try { - vector.allocateNew(expectedAllocationInBytes, 10); - assertTrue(expectedOffsetSize <= vector.getValueCapacity()); - assertTrue(expectedAllocationInBytes <= vector.getDataBuffer().capacity()); - vector.reAlloc(); - assertTrue(expectedOffsetSize * 2 <= vector.getValueCapacity()); - assertTrue(expectedAllocationInBytes * 2 <= vector.getDataBuffer().capacity()); - } finally { - vector.close(); - } + assertThrows(OversizedAllocationException.class, () -> { + final LargeVarCharVector vector = new LargeVarCharVector("vector", allocator); + // edge case 1: value count = MAX_VALUE_ALLOCATION + final long expectedAllocationInBytes = BaseValueVector.MAX_ALLOCATION_SIZE; + final int expectedOffsetSize = 10; + try { + vector.allocateNew(expectedAllocationInBytes, 10); + assertTrue(expectedOffsetSize <= vector.getValueCapacity()); + assertTrue(expectedAllocationInBytes <= vector.getDataBuffer().capacity()); + vector.reAlloc(); + assertTrue(expectedOffsetSize * 2 <= vector.getValueCapacity()); + assertTrue(expectedAllocationInBytes * 2 <= vector.getDataBuffer().capacity()); + } finally { + vector.close(); + } - // common: value count < MAX_VALUE_ALLOCATION - try { - vector.allocateNew(BaseValueVector.MAX_ALLOCATION_SIZE / 2, 0); - vector.reAlloc(); // value allocation reaches to MAX_VALUE_ALLOCATION - vector.reAlloc(); // this tests if it overflows - } finally { - vector.close(); - } + // common: value count < MAX_VALUE_ALLOCATION + try { + vector.allocateNew(BaseValueVector.MAX_ALLOCATION_SIZE / 2, 0); + vector.reAlloc(); // value allocation reaches to MAX_VALUE_ALLOCATION + vector.reAlloc(); // this tests if it overflows + } finally { + vector.close(); + } + }); } @Test @@ -784,7 +788,7 @@ public void testNullableType() { try { vector.set(initialCapacity, "foo".getBytes(StandardCharsets.UTF_8)); - Assert.fail("Expected out of bounds exception"); + fail("Expected out of bounds exception"); } catch (Exception e) { // ok } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestListVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestListVector.java index 97f2d9fd6def1..cbcb6cf9d7963 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestListVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestListVector.java @@ -17,12 +17,12 @@ package org.apache.arrow.vector; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertNull; -import static org.junit.Assert.assertSame; -import static org.junit.Assert.assertTrue; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertSame; import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; import java.util.ArrayList; import java.util.Arrays; @@ -44,21 +44,20 @@ import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.FieldType; import org.apache.arrow.vector.util.TransferPair; -import org.junit.After; -import org.junit.Assert; -import org.junit.Before; -import org.junit.Test; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; public class TestListVector { private BufferAllocator allocator; - @Before + @BeforeEach public void init() { allocator = new DirtyRootAllocator(Long.MAX_VALUE, (byte) 100); } - @After + @AfterEach public void terminate() throws Exception { allocator.close(); } @@ -96,11 +95,11 @@ public void testCopyFrom() throws Exception { // assert the output vector is correct FieldReader reader = outVector.getReader(); - Assert.assertTrue("shouldn't be null", reader.isSet()); + assertTrue(reader.isSet(), "shouldn't be null"); reader.setPosition(1); - Assert.assertFalse("should be null", reader.isSet()); + assertFalse(reader.isSet(), "should be null"); reader.setPosition(2); - Assert.assertTrue("shouldn't be null", reader.isSet()); + assertTrue(reader.isSet(), "shouldn't be null"); /* index 0 */ @@ -439,15 +438,15 @@ public void testSplitAndTransfer() throws Exception { dataLength2 = toOffsetBuffer.getInt((i + 1) * ListVector.OFFSET_WIDTH) - toOffsetBuffer.getInt(i * ListVector.OFFSET_WIDTH); - assertEquals("Different data lengths at index: " + i + " and start: " + start, - dataLength1, dataLength2); + assertEquals(dataLength1, dataLength2, + "Different data lengths at index: " + i + " and start: " + start); offset1 = offsetBuffer.getInt((start + i) * ListVector.OFFSET_WIDTH); offset2 = toOffsetBuffer.getInt(i * ListVector.OFFSET_WIDTH); for (int j = 0; j < dataLength1; j++) { - assertEquals("Different data at indexes: " + offset1 + " and " + offset2, - dataVector.getObject(offset1), dataVector1.getObject(offset2)); + assertEquals(dataVector.getObject(offset1), dataVector1.getObject(offset2), + "Different data at indexes: " + offset1 + " and " + offset2); offset1++; offset2++; diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestListViewVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestListViewVector.java new file mode 100644 index 0000000000000..e64ed77b1eb9f --- /dev/null +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestListViewVector.java @@ -0,0 +1,1651 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.vector; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; + +import org.apache.arrow.memory.ArrowBuf; +import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.vector.complex.BaseRepeatedValueVector; +import org.apache.arrow.vector.complex.BaseRepeatedValueViewVector; +import org.apache.arrow.vector.complex.ListVector; +import org.apache.arrow.vector.complex.ListViewVector; +import org.apache.arrow.vector.complex.impl.UnionListViewWriter; +import org.apache.arrow.vector.complex.impl.UnionListWriter; +import org.apache.arrow.vector.holders.DurationHolder; +import org.apache.arrow.vector.holders.TimeStampMilliTZHolder; +import org.apache.arrow.vector.types.TimeUnit; +import org.apache.arrow.vector.types.Types.MinorType; +import org.apache.arrow.vector.types.pojo.ArrowType; +import org.apache.arrow.vector.types.pojo.Field; +import org.apache.arrow.vector.types.pojo.FieldType; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +public class TestListViewVector { + + private BufferAllocator allocator; + + @BeforeEach + public void init() { + allocator = new DirtyRootAllocator(Long.MAX_VALUE, (byte) 100); + } + + @AfterEach + public void terminate() throws Exception { + allocator.close(); + } + + @Test + public void testBasicListViewVector() { + try (ListViewVector listViewVector = ListViewVector.empty("sourceVector", allocator)) { + UnionListViewWriter listViewWriter = listViewVector.getWriter(); + + /* allocate memory */ + listViewWriter.allocate(); + + /* write the first list at index 0 */ + listViewWriter.setPosition(0); + listViewWriter.startList(); + + listViewWriter.bigInt().writeBigInt(12); + listViewWriter.bigInt().writeBigInt(-7); + listViewWriter.bigInt().writeBigInt(25); + listViewWriter.endList(); + + /* the second list at index 1 is null (we are not setting any)*/ + + /* write the third list at index 2 */ + listViewWriter.setPosition(2); + listViewWriter.startList(); + + listViewWriter.bigInt().writeBigInt(0); + listViewWriter.bigInt().writeBigInt(-127); + listViewWriter.bigInt().writeBigInt(127); + listViewWriter.bigInt().writeBigInt(50); + listViewWriter.endList(); + + /* write the fourth list at index 3 (empty list) */ + listViewWriter.setPosition(3); + listViewWriter.startList(); + listViewWriter.endList(); + + /* write the fifth list at index 4 */ + listViewWriter.setPosition(4); + listViewWriter.startList(); + listViewWriter.bigInt().writeBigInt(1); + listViewWriter.bigInt().writeBigInt(2); + listViewWriter.bigInt().writeBigInt(3); + listViewWriter.bigInt().writeBigInt(4); + listViewWriter.endList(); + + listViewVector.setValueCount(5); + // check value count + assertEquals(5, listViewVector.getValueCount()); + + /* get vector at index 0 -- the value is a BigIntVector*/ + final ArrowBuf offSetBuffer = listViewVector.getOffsetBuffer(); + final ArrowBuf sizeBuffer = listViewVector.getSizeBuffer(); + final FieldVector dataVec = listViewVector.getDataVector(); + + // check offset buffer + assertEquals(0, offSetBuffer.getInt(0 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(0, offSetBuffer.getInt(1 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(3, offSetBuffer.getInt(2 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(7, offSetBuffer.getInt(3 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(7, offSetBuffer.getInt(4 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + + // check size buffer + assertEquals(3, sizeBuffer.getInt(0 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(0, sizeBuffer.getInt(1 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(4, sizeBuffer.getInt(2 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(0, sizeBuffer.getInt(3 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(4, sizeBuffer.getInt(4 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + + // check data vector + assertEquals(12, ((BigIntVector) dataVec).get(0)); + assertEquals(-7, ((BigIntVector) dataVec).get(1)); + assertEquals(25, ((BigIntVector) dataVec).get(2)); + assertEquals(0, ((BigIntVector) dataVec).get(3)); + assertEquals(-127, ((BigIntVector) dataVec).get(4)); + assertEquals(127, ((BigIntVector) dataVec).get(5)); + assertEquals(50, ((BigIntVector) dataVec).get(6)); + assertEquals(1, ((BigIntVector) dataVec).get(7)); + assertEquals(2, ((BigIntVector) dataVec).get(8)); + assertEquals(3, ((BigIntVector) dataVec).get(9)); + assertEquals(4, ((BigIntVector) dataVec).get(10)); + + listViewVector.validate(); + } + } + + @Test + public void testImplicitNullVectors() { + try (ListViewVector listViewVector = ListViewVector.empty("sourceVector", allocator)) { + UnionListViewWriter listViewWriter = listViewVector.getWriter(); + /* allocate memory */ + listViewWriter.allocate(); + + final ArrowBuf offSetBuffer = listViewVector.getOffsetBuffer(); + final ArrowBuf sizeBuffer = listViewVector.getSizeBuffer(); + + /* write the first list at index 0 */ + listViewWriter.setPosition(0); + listViewWriter.startList(); + + listViewWriter.bigInt().writeBigInt(12); + listViewWriter.bigInt().writeBigInt(-7); + listViewWriter.bigInt().writeBigInt(25); + listViewWriter.endList(); + + int offSet0 = offSetBuffer.getInt(0 * BaseRepeatedValueViewVector.OFFSET_WIDTH); + int size0 = sizeBuffer.getInt(0 * BaseRepeatedValueViewVector.SIZE_WIDTH); + + // after the first list is written, + // the initial offset must be 0, + // the size must be 3 (as there are 3 elements in the array), + // the lastSet must be 0 since, the first list is written at index 0. + + assertEquals(0, offSet0); + assertEquals(3, size0); + + listViewWriter.setPosition(5); + listViewWriter.startList(); + + // writing the 6th list at index 5, + // and the list items from index 1 through 4 are not populated. + // but since there is a gap between the 0th and 5th list, in terms + // of buffer allocation, the offset and size buffers must be updated + // to reflect the implicit null vectors. + + for (int i = 1; i < 5; i++) { + int offSet = offSetBuffer.getInt(i * BaseRepeatedValueViewVector.OFFSET_WIDTH); + int size = sizeBuffer.getInt(i * BaseRepeatedValueViewVector.SIZE_WIDTH); + // Since the list is not written, the offset and size must equal to child vector's size + // i.e., 3, and size should be 0 as the list is not written. + // And the last set value is the value currently being written, which is 5. + assertEquals(0, offSet); + assertEquals(0, size); + } + + listViewWriter.bigInt().writeBigInt(12); + listViewWriter.bigInt().writeBigInt(25); + listViewWriter.endList(); + + int offSet5 = offSetBuffer.getInt(5 * BaseRepeatedValueViewVector.OFFSET_WIDTH); + int size5 = sizeBuffer.getInt(5 * BaseRepeatedValueViewVector.SIZE_WIDTH); + + assertEquals(3, offSet5); + assertEquals(2, size5); + + listViewWriter.setPosition(10); + listViewWriter.startList(); + + // writing the 11th list at index 10, + // and the list items from index 6 through 10 are not populated. + // but since there is a gap between the 5th and 11th list, in terms + // of buffer allocation, the offset and size buffers must be updated + // to reflect the implicit null vectors. + for (int i = 6; i < 10; i++) { + int offSet = offSetBuffer.getInt(i * BaseRepeatedValueViewVector.OFFSET_WIDTH); + int size = sizeBuffer.getInt(i * BaseRepeatedValueViewVector.SIZE_WIDTH); + // Since the list is not written, the offset and size must equal to 0 + // and size should be 0 as the list is not written. + // And the last set value is the value currently being written, which is 10. + assertEquals(0, offSet); + assertEquals(0, size); + } + + listViewWriter.bigInt().writeBigInt(12); + listViewWriter.endList(); + + int offSet11 = offSetBuffer.getInt(10 * BaseRepeatedValueViewVector.OFFSET_WIDTH); + int size11 = sizeBuffer.getInt(10 * BaseRepeatedValueViewVector.SIZE_WIDTH); + + assertEquals(5, offSet11); + assertEquals(1, size11); + + listViewVector.setValueCount(11); + + listViewVector.validate(); + } + } + + @Test + public void testNestedListViewVector() { + try (ListViewVector listViewVector = ListViewVector.empty("sourceVector", allocator)) { + UnionListViewWriter listViewWriter = listViewVector.getWriter(); + + /* allocate memory */ + listViewWriter.allocate(); + + /* the dataVector that backs a listVector will also be a + * listVector for this test. + */ + + /* write one or more inner lists at index 0 */ + listViewWriter.setPosition(0); + listViewWriter.startList(); + + listViewWriter.list().startList(); + listViewWriter.list().bigInt().writeBigInt(50); + listViewWriter.list().bigInt().writeBigInt(100); + listViewWriter.list().bigInt().writeBigInt(200); + listViewWriter.list().endList(); + + listViewWriter.list().startList(); + listViewWriter.list().bigInt().writeBigInt(75); + listViewWriter.list().bigInt().writeBigInt(125); + listViewWriter.list().bigInt().writeBigInt(150); + listViewWriter.list().bigInt().writeBigInt(175); + listViewWriter.list().endList(); + + listViewWriter.endList(); + + /* write one or more inner lists at index 1 */ + listViewWriter.setPosition(1); + listViewWriter.startList(); + + listViewWriter.list().startList(); + listViewWriter.list().bigInt().writeBigInt(10); + listViewWriter.list().endList(); + + listViewWriter.list().startList(); + listViewWriter.list().bigInt().writeBigInt(15); + listViewWriter.list().bigInt().writeBigInt(20); + listViewWriter.list().endList(); + + listViewWriter.list().startList(); + listViewWriter.list().bigInt().writeBigInt(25); + listViewWriter.list().bigInt().writeBigInt(30); + listViewWriter.list().bigInt().writeBigInt(35); + listViewWriter.list().endList(); + + listViewWriter.endList(); + + listViewVector.setValueCount(2); + + // [[[50,100,200],[75,125,150,175]], [[10],[15,20],[25,30,35]]] + + assertEquals(2, listViewVector.getValueCount()); + + /* get listViewVector value at index 0 -- the value itself is a listViewVector */ + Object result = listViewVector.getObject(0); + ArrayList> resultSet = (ArrayList>) result; + ArrayList list; + + assertEquals(2, resultSet.size()); /* 2 inner lists at index 0 */ + assertEquals(3, resultSet.get(0).size()); /* size of the first inner list */ + assertEquals(4, resultSet.get(1).size()); /* size of the second inner list */ + + list = resultSet.get(0); + assertEquals(Long.valueOf(50), list.get(0)); + assertEquals(Long.valueOf(100), list.get(1)); + assertEquals(Long.valueOf(200), list.get(2)); + + list = resultSet.get(1); + assertEquals(Long.valueOf(75), list.get(0)); + assertEquals(Long.valueOf(125), list.get(1)); + assertEquals(Long.valueOf(150), list.get(2)); + assertEquals(Long.valueOf(175), list.get(3)); + + /* get listViewVector value at index 1 -- the value itself is a listViewVector */ + result = listViewVector.getObject(1); + resultSet = (ArrayList>) result; + + assertEquals(3, resultSet.size()); /* 3 inner lists at index 1 */ + assertEquals(1, resultSet.get(0).size()); /* size of the first inner list */ + assertEquals(2, resultSet.get(1).size()); /* size of the second inner list */ + assertEquals(3, resultSet.get(2).size()); /* size of the third inner list */ + + list = resultSet.get(0); + assertEquals(Long.valueOf(10), list.get(0)); + + list = resultSet.get(1); + assertEquals(Long.valueOf(15), list.get(0)); + assertEquals(Long.valueOf(20), list.get(1)); + + list = resultSet.get(2); + assertEquals(Long.valueOf(25), list.get(0)); + assertEquals(Long.valueOf(30), list.get(1)); + assertEquals(Long.valueOf(35), list.get(2)); + + /* check underlying bitVector */ + assertFalse(listViewVector.isNull(0)); + assertFalse(listViewVector.isNull(1)); + + final ArrowBuf offSetBuffer = listViewVector.getOffsetBuffer(); + final ArrowBuf sizeBuffer = listViewVector.getSizeBuffer(); + + // check offset buffer + assertEquals(0, offSetBuffer.getInt(0 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(2, offSetBuffer.getInt(1 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + + // check size buffer + assertEquals(2, sizeBuffer.getInt(0 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(3, sizeBuffer.getInt(1 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + + listViewVector.validate(); + } + } + + @Test + public void testNestedListVector() throws Exception { + try (ListViewVector listViewVector = ListViewVector.empty("sourceVector", allocator)) { + + MinorType listType = MinorType.LISTVIEW; + MinorType scalarType = MinorType.BIGINT; + + listViewVector.addOrGetVector(FieldType.nullable(listType.getType())); + + ListViewVector innerList1 = (ListViewVector) listViewVector.getDataVector(); + innerList1.addOrGetVector(FieldType.nullable(listType.getType())); + + ListViewVector innerList2 = (ListViewVector) innerList1.getDataVector(); + innerList2.addOrGetVector(FieldType.nullable(listType.getType())); + + ListViewVector innerList3 = (ListViewVector) innerList2.getDataVector(); + innerList3.addOrGetVector(FieldType.nullable(listType.getType())); + + ListViewVector innerList4 = (ListViewVector) innerList3.getDataVector(); + innerList4.addOrGetVector(FieldType.nullable(listType.getType())); + + ListViewVector innerList5 = (ListViewVector) innerList4.getDataVector(); + innerList5.addOrGetVector(FieldType.nullable(listType.getType())); + + ListViewVector innerList6 = (ListViewVector) innerList5.getDataVector(); + innerList6.addOrGetVector(FieldType.nullable(scalarType.getType())); + + listViewVector.setInitialCapacity(128); + + listViewVector.validate(); + } + } + + private void setValuesInBuffer(int[] bufValues, ArrowBuf buffer, long bufWidth) { + for (int i = 0; i < bufValues.length; i++) { + buffer.setInt(i * bufWidth, bufValues[i]); + } + } + + /* + * Setting up the buffers directly needs to be validated with the base method used in + * the ListVector class where we use the approach of startList(), + * write to the child vector and endList(). + *

    + * To support this, we have to consider the following scenarios; + *

    + * 1. Only using directly buffer-based inserts. + * 2. Default list insertion followed by buffer-based inserts. + * 3. Buffer-based inserts followed by default list insertion. + */ + + /* Setting up buffers directly would require the following steps to be taken + * 0. Allocate buffers in listViewVector by calling `allocateNew` method. + * 1. Initialize the child vector using `initializeChildrenFromFields` method. + * 2. Set values in the child vector. + * 3. Set validity, offset and size buffers using `setValidity`, + * `setOffset` and `setSize` methods. + * 4. Set value count using `setValueCount` method. + */ + @Test + public void testBasicListViewSet() { + + try (ListViewVector listViewVector = ListViewVector.empty("sourceVector", allocator)) { + // Allocate buffers in listViewVector by calling `allocateNew` method. + listViewVector.allocateNew(); + + // Initialize the child vector using `initializeChildrenFromFields` method. + FieldType fieldType = new FieldType(true, new ArrowType.Int(64, true), + null, null); + Field field = new Field("child-vector", fieldType, null); + listViewVector.initializeChildrenFromFields(Collections.singletonList(field)); + + // Set values in the child vector. + FieldVector fieldVector = listViewVector.getDataVector(); + fieldVector.clear(); + + BigIntVector childVector = (BigIntVector) fieldVector; + childVector.allocateNew(7); + + childVector.set(0, 12); + childVector.set(1, -7); + childVector.set(2, 25); + childVector.set(3, 0); + childVector.set(4, -127); + childVector.set(5, 127); + childVector.set(6, 50); + + childVector.setValueCount(7); + + // Set validity, offset and size buffers using `setValidity`, + // `setOffset` and `setSize` methods. + listViewVector.setOffset(0, 0); + listViewVector.setOffset(1, 3); + listViewVector.setOffset(2, 3); + listViewVector.setOffset(3, 7); + + listViewVector.setSize(0, 3); + listViewVector.setSize(1, 0); + listViewVector.setSize(2, 4); + listViewVector.setSize(3, 0); + + listViewVector.setValidity(0, 1); + listViewVector.setValidity(1, 0); + listViewVector.setValidity(2, 1); + listViewVector.setValidity(3, 1); + + // Set value count using `setValueCount` method. + listViewVector.setValueCount(4); + + final ArrowBuf offSetBuffer = listViewVector.getOffsetBuffer(); + final ArrowBuf sizeBuffer = listViewVector.getSizeBuffer(); + + // check offset buffer + assertEquals(0, offSetBuffer.getInt(0 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(3, offSetBuffer.getInt(1 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(3, offSetBuffer.getInt(2 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(7, offSetBuffer.getInt(3 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + + // check size buffer + assertEquals(3, sizeBuffer.getInt(0 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(0, sizeBuffer.getInt(1 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(4, sizeBuffer.getInt(2 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(0, sizeBuffer.getInt(3 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + + // check values + assertEquals(12, ((BigIntVector) listViewVector.getDataVector()).get(0)); + assertEquals(-7, ((BigIntVector) listViewVector.getDataVector()).get(1)); + assertEquals(25, ((BigIntVector) listViewVector.getDataVector()).get(2)); + assertEquals(0, ((BigIntVector) listViewVector.getDataVector()).get(3)); + assertEquals(-127, ((BigIntVector) listViewVector.getDataVector()).get(4)); + assertEquals(127, ((BigIntVector) listViewVector.getDataVector()).get(5)); + assertEquals(50, ((BigIntVector) listViewVector.getDataVector()).get(6)); + + listViewVector.validate(); + } + } + + @Test + public void testBasicListViewSetNested() { + // Expected listview + // [[[50,100,200],[75,125,150,175]],[[10],[15,20],[25,30,35]]] + + // Setting child vector + // [[50,100,200],[75,125,150,175],[10],[15,20],[25,30,35]] + try (ListViewVector listViewVector = ListViewVector.empty("sourceVector", allocator)) { + // Allocate buffers in listViewVector by calling `allocateNew` method. + listViewVector.allocateNew(); + + // Initialize the child vector using `initializeChildrenFromFields` method. + FieldType fieldType = new FieldType(true, new ArrowType.List(), + null, null); + FieldType childFieldType = new FieldType(true, new ArrowType.Int(64, true), + null, null); + Field childField = new Field("child-vector", childFieldType, null); + List children = new ArrayList<>(); + children.add(childField); + Field field = new Field("child-vector", fieldType, children); + listViewVector.initializeChildrenFromFields(Collections.singletonList(field)); + + // Set values in the child vector. + FieldVector fieldVector = listViewVector.getDataVector(); + fieldVector.clear(); + + ListVector childVector = (ListVector) fieldVector; + UnionListWriter listWriter = childVector.getWriter(); + listWriter.allocate(); + + listWriter.setPosition(0); + listWriter.startList(); + + listWriter.bigInt().writeBigInt(50); + listWriter.bigInt().writeBigInt(100); + listWriter.bigInt().writeBigInt(200); + + listWriter.endList(); + + listWriter.setPosition(1); + listWriter.startList(); + + listWriter.bigInt().writeBigInt(75); + listWriter.bigInt().writeBigInt(125); + listWriter.bigInt().writeBigInt(150); + listWriter.bigInt().writeBigInt(175); + + listWriter.endList(); + + listWriter.setPosition(2); + listWriter.startList(); + + listWriter.bigInt().writeBigInt(10); + + listWriter.endList(); + + listWriter.startList(); + listWriter.setPosition(3); + + listWriter.bigInt().writeBigInt(15); + listWriter.bigInt().writeBigInt(20); + + listWriter.endList(); + + listWriter.startList(); + listWriter.setPosition(4); + + listWriter.bigInt().writeBigInt(25); + listWriter.bigInt().writeBigInt(30); + listWriter.bigInt().writeBigInt(35); + + listWriter.endList(); + + childVector.setValueCount(5); + + // Set validity, offset and size buffers using `setValidity`, + // `setOffset` and `setSize` methods. + + listViewVector.setValidity(0, 1); + listViewVector.setValidity(1, 1); + + listViewVector.setOffset(0, 0); + listViewVector.setOffset(1, 2); + + listViewVector.setSize(0, 2); + listViewVector.setSize(1, 3); + + // Set value count using `setValueCount` method. + listViewVector.setValueCount(2); + + assertEquals(2, listViewVector.getValueCount()); + + /* get listViewVector value at index 0 -- the value itself is a listViewVector */ + Object result = listViewVector.getObject(0); + ArrayList> resultSet = (ArrayList>) result; + ArrayList list; + + assertEquals(2, resultSet.size()); /* 2 inner lists at index 0 */ + assertEquals(3, resultSet.get(0).size()); /* size of the first inner list */ + assertEquals(4, resultSet.get(1).size()); /* size of the second inner list */ + + list = resultSet.get(0); + assertEquals(Long.valueOf(50), list.get(0)); + assertEquals(Long.valueOf(100), list.get(1)); + assertEquals(Long.valueOf(200), list.get(2)); + + list = resultSet.get(1); + assertEquals(Long.valueOf(75), list.get(0)); + assertEquals(Long.valueOf(125), list.get(1)); + assertEquals(Long.valueOf(150), list.get(2)); + assertEquals(Long.valueOf(175), list.get(3)); + + /* get listViewVector value at index 1 -- the value itself is a listViewVector */ + result = listViewVector.getObject(1); + resultSet = (ArrayList>) result; + + assertEquals(3, resultSet.size()); /* 3 inner lists at index 1 */ + assertEquals(1, resultSet.get(0).size()); /* size of the first inner list */ + assertEquals(2, resultSet.get(1).size()); /* size of the second inner list */ + assertEquals(3, resultSet.get(2).size()); /* size of the third inner list */ + + list = resultSet.get(0); + assertEquals(Long.valueOf(10), list.get(0)); + + list = resultSet.get(1); + assertEquals(Long.valueOf(15), list.get(0)); + assertEquals(Long.valueOf(20), list.get(1)); + + list = resultSet.get(2); + assertEquals(Long.valueOf(25), list.get(0)); + assertEquals(Long.valueOf(30), list.get(1)); + assertEquals(Long.valueOf(35), list.get(2)); + + /* check underlying bitVector */ + assertFalse(listViewVector.isNull(0)); + assertFalse(listViewVector.isNull(1)); + + final ArrowBuf offSetBuffer = listViewVector.getOffsetBuffer(); + final ArrowBuf sizeBuffer = listViewVector.getSizeBuffer(); + + // check offset buffer + assertEquals(0, offSetBuffer.getInt(0 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(2, offSetBuffer.getInt(1 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + + // check size buffer + assertEquals(2, sizeBuffer.getInt(0 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(3, sizeBuffer.getInt(1 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + + listViewVector.validate(); + } + } + + @Test + public void testBasicListViewSetWithListViewWriter() { + try (ListViewVector listViewVector = ListViewVector.empty("sourceVector", allocator)) { + // Allocate buffers in listViewVector by calling `allocateNew` method. + listViewVector.allocateNew(); + + // Initialize the child vector using `initializeChildrenFromFields` method. + FieldType fieldType = new FieldType(true, new ArrowType.Int(64, true), + null, null); + Field field = new Field("child-vector", fieldType, null); + listViewVector.initializeChildrenFromFields(Collections.singletonList(field)); + + // Set values in the child vector. + FieldVector fieldVector = listViewVector.getDataVector(); + fieldVector.clear(); + + BigIntVector childVector = (BigIntVector) fieldVector; + childVector.allocateNew(7); + + childVector.set(0, 12); + childVector.set(1, -7); + childVector.set(2, 25); + childVector.set(3, 0); + childVector.set(4, -127); + childVector.set(5, 127); + childVector.set(6, 50); + + childVector.setValueCount(7); + + // Set validity, offset and size buffers using `setValidity`, + // `setOffset` and `setSize` methods. + + listViewVector.setValidity(0, 1); + listViewVector.setValidity(1, 0); + listViewVector.setValidity(2, 1); + listViewVector.setValidity(3, 1); + + listViewVector.setOffset(0, 0); + listViewVector.setOffset(1, 3); + listViewVector.setOffset(2, 3); + listViewVector.setOffset(3, 7); + + listViewVector.setSize(0, 3); + listViewVector.setSize(1, 0); + listViewVector.setSize(2, 4); + listViewVector.setSize(3, 0); + + // Set value count using `setValueCount` method. + listViewVector.setValueCount(4); + + final ArrowBuf offSetBuffer = listViewVector.getOffsetBuffer(); + final ArrowBuf sizeBuffer = listViewVector.getSizeBuffer(); + + // check offset buffer + assertEquals(0, offSetBuffer.getInt(0 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(3, offSetBuffer.getInt(1 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(3, offSetBuffer.getInt(2 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(7, offSetBuffer.getInt(3 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + + // check size buffer + assertEquals(3, sizeBuffer.getInt(0 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(0, sizeBuffer.getInt(1 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(4, sizeBuffer.getInt(2 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(0, sizeBuffer.getInt(3 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + + // check values + assertEquals(12, ((BigIntVector) listViewVector.getDataVector()).get(0)); + assertEquals(-7, ((BigIntVector) listViewVector.getDataVector()).get(1)); + assertEquals(25, ((BigIntVector) listViewVector.getDataVector()).get(2)); + assertEquals(0, ((BigIntVector) listViewVector.getDataVector()).get(3)); + assertEquals(-127, ((BigIntVector) listViewVector.getDataVector()).get(4)); + assertEquals(127, ((BigIntVector) listViewVector.getDataVector()).get(5)); + assertEquals(50, ((BigIntVector) listViewVector.getDataVector()).get(6)); + + UnionListViewWriter listViewWriter = listViewVector.getWriter(); + + listViewWriter.setPosition(4); + listViewWriter.startList(); + + listViewWriter.bigInt().writeBigInt(121); + listViewWriter.bigInt().writeBigInt(-71); + listViewWriter.bigInt().writeBigInt(251); + listViewWriter.endList(); + + listViewVector.setValueCount(5); + + // check offset buffer + assertEquals(0, offSetBuffer.getInt(0 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(3, offSetBuffer.getInt(1 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(3, offSetBuffer.getInt(2 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(7, offSetBuffer.getInt(3 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(7, offSetBuffer.getInt(4 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + + // check size buffer + assertEquals(3, sizeBuffer.getInt(0 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(0, sizeBuffer.getInt(1 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(4, sizeBuffer.getInt(2 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(0, sizeBuffer.getInt(3 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(3, sizeBuffer.getInt(4 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + + // check values + assertEquals(12, ((BigIntVector) listViewVector.getDataVector()).get(0)); + assertEquals(-7, ((BigIntVector) listViewVector.getDataVector()).get(1)); + assertEquals(25, ((BigIntVector) listViewVector.getDataVector()).get(2)); + assertEquals(0, ((BigIntVector) listViewVector.getDataVector()).get(3)); + assertEquals(-127, ((BigIntVector) listViewVector.getDataVector()).get(4)); + assertEquals(127, ((BigIntVector) listViewVector.getDataVector()).get(5)); + assertEquals(50, ((BigIntVector) listViewVector.getDataVector()).get(6)); + assertEquals(121, ((BigIntVector) listViewVector.getDataVector()).get(7)); + assertEquals(-71, ((BigIntVector) listViewVector.getDataVector()).get(8)); + assertEquals(251, ((BigIntVector) listViewVector.getDataVector()).get(9)); + + listViewVector.validate(); + } + } + + @Test + public void testGetBufferAddress() throws Exception { + try (ListViewVector listViewVector = ListViewVector.empty("vector", allocator)) { + + UnionListViewWriter listViewWriter = listViewVector.getWriter(); + boolean error = false; + + listViewWriter.allocate(); + + listViewWriter.setPosition(0); + listViewWriter.startList(); + listViewWriter.bigInt().writeBigInt(50); + listViewWriter.bigInt().writeBigInt(100); + listViewWriter.bigInt().writeBigInt(200); + listViewWriter.endList(); + + listViewWriter.setPosition(1); + listViewWriter.startList(); + listViewWriter.bigInt().writeBigInt(250); + listViewWriter.bigInt().writeBigInt(300); + listViewWriter.endList(); + + listViewVector.setValueCount(2); + + /* check listVector contents */ + Object result = listViewVector.getObject(0); + ArrayList resultSet = (ArrayList) result; + assertEquals(3, resultSet.size()); + assertEquals(Long.valueOf(50), resultSet.get(0)); + assertEquals(Long.valueOf(100), resultSet.get(1)); + assertEquals(Long.valueOf(200), resultSet.get(2)); + + result = listViewVector.getObject(1); + resultSet = (ArrayList) result; + assertEquals(2, resultSet.size()); + assertEquals(Long.valueOf(250), resultSet.get(0)); + assertEquals(Long.valueOf(300), resultSet.get(1)); + + List buffers = listViewVector.getFieldBuffers(); + + long bitAddress = listViewVector.getValidityBufferAddress(); + long offsetAddress = listViewVector.getOffsetBufferAddress(); + long sizeAddress = listViewVector.getSizeBufferAddress(); + + try { + listViewVector.getDataBufferAddress(); + } catch (UnsupportedOperationException ue) { + error = true; + } finally { + assertTrue(error); + } + + assertEquals(3, buffers.size()); + assertEquals(bitAddress, buffers.get(0).memoryAddress()); + assertEquals(offsetAddress, buffers.get(1).memoryAddress()); + assertEquals(sizeAddress, buffers.get(2).memoryAddress()); + + /* (3+2)/2 */ + assertEquals(2.5, listViewVector.getDensity(), 0); + listViewVector.validate(); + } + } + + @Test + public void testConsistentChildName() throws Exception { + try (ListViewVector listViewVector = ListViewVector.empty("sourceVector", allocator)) { + String emptyListStr = listViewVector.getField().toString(); + assertTrue(emptyListStr.contains(ListVector.DATA_VECTOR_NAME)); + + listViewVector.addOrGetVector(FieldType.nullable(MinorType.INT.getType())); + String emptyVectorStr = listViewVector.getField().toString(); + assertTrue(emptyVectorStr.contains(ListVector.DATA_VECTOR_NAME)); + } + } + + @Test + public void testSetInitialCapacity() { + try (final ListViewVector vector = ListViewVector.empty("", allocator)) { + vector.addOrGetVector(FieldType.nullable(MinorType.INT.getType())); + + vector.setInitialCapacity(512); + vector.allocateNew(); + assertEquals(512, vector.getValueCapacity()); + assertTrue(vector.getDataVector().getValueCapacity() >= 512); + + vector.setInitialCapacity(512, 4); + vector.allocateNew(); + assertEquals(512, vector.getValueCapacity()); + assertTrue(vector.getDataVector().getValueCapacity() >= 512 * 4); + + vector.setInitialCapacity(512, 0.1); + vector.allocateNew(); + assertEquals(512, vector.getValueCapacity()); + assertTrue(vector.getDataVector().getValueCapacity() >= 51); + + vector.setInitialCapacity(512, 0.01); + vector.allocateNew(); + assertEquals(512, vector.getValueCapacity()); + assertTrue(vector.getDataVector().getValueCapacity() >= 5); + + vector.setInitialCapacity(5, 0.1); + vector.allocateNew(); + assertEquals(8, vector.getValueCapacity()); + assertTrue(vector.getDataVector().getValueCapacity() >= 1); + + vector.validate(); + } + } + + @Test + public void testClearAndReuse() { + try (final ListViewVector vector = ListViewVector.empty("listview", allocator)) { + BigIntVector bigIntVector = + (BigIntVector) vector.addOrGetVector(FieldType.nullable(MinorType.BIGINT.getType())).getVector(); + vector.setInitialCapacity(10); + vector.allocateNew(); + + vector.startNewValue(0); + bigIntVector.setSafe(0, 7); + vector.endValue(0, 1); + vector.startNewValue(1); + bigIntVector.setSafe(1, 8); + vector.endValue(1, 1); + vector.setValueCount(2); + + Object result = vector.getObject(0); + ArrayList resultSet = (ArrayList) result; + assertEquals(Long.valueOf(7), resultSet.get(0)); + + result = vector.getObject(1); + resultSet = (ArrayList) result; + assertEquals(Long.valueOf(8), resultSet.get(0)); + + // Clear and release the buffers to trigger a realloc when adding next value + vector.clear(); + + // The list vector should reuse a buffer when reallocating the offset buffer + vector.startNewValue(0); + bigIntVector.setSafe(0, 7); + vector.endValue(0, 1); + vector.startNewValue(1); + bigIntVector.setSafe(1, 8); + vector.endValue(1, 1); + vector.setValueCount(2); + + result = vector.getObject(0); + resultSet = (ArrayList) result; + assertEquals(Long.valueOf(7), resultSet.get(0)); + + result = vector.getObject(1); + resultSet = (ArrayList) result; + assertEquals(Long.valueOf(8), resultSet.get(0)); + + vector.validate(); + } + } + + @Test + public void testWriterGetField() { + // adopted from ListVector test cases + try (final ListViewVector vector = ListViewVector.empty("listview", allocator)) { + + UnionListViewWriter writer = vector.getWriter(); + writer.allocate(); + + //set some values + writer.startList(); + writer.integer().writeInt(1); + writer.integer().writeInt(2); + writer.endList(); + vector.setValueCount(2); + + Field expectedDataField = new Field(BaseRepeatedValueVector.DATA_VECTOR_NAME, + FieldType.nullable(new ArrowType.Int(32, true)), null); + Field expectedField = new Field(vector.getName(), FieldType.nullable(ArrowType.ListView.INSTANCE), + Arrays.asList(expectedDataField)); + + assertEquals(expectedField, writer.getField()); + + vector.validate(); + } + } + + @Test + public void testWriterUsingHolderGetTimestampMilliTZField() { + // adopted from ListVector test cases + try (final ListViewVector vector = ListViewVector.empty("listview", allocator)) { + org.apache.arrow.vector.complex.writer.FieldWriter writer = vector.getWriter(); + writer.allocate(); + + TimeStampMilliTZHolder holder = new TimeStampMilliTZHolder(); + holder.timezone = "SomeFakeTimeZone"; + writer.startList(); + holder.value = 12341234L; + writer.timeStampMilliTZ().write(holder); + holder.value = 55555L; + writer.timeStampMilliTZ().write(holder); + + // Writing with a different timezone should throw + holder.timezone = "AsdfTimeZone"; + holder.value = 77777; + IllegalArgumentException ex = assertThrows(IllegalArgumentException.class, + () -> writer.timeStampMilliTZ().write(holder)); + assertEquals( + "holder.timezone: AsdfTimeZone not equal to vector timezone: SomeFakeTimeZone", + ex.getMessage()); + + writer.endList(); + vector.setValueCount(1); + + Field expectedDataField = new Field(BaseRepeatedValueVector.DATA_VECTOR_NAME, + FieldType.nullable(new ArrowType.Timestamp(TimeUnit.MILLISECOND, "SomeFakeTimeZone")), null); + Field expectedField = new Field(vector.getName(), FieldType.nullable(ArrowType.ListView.INSTANCE), + Arrays.asList(expectedDataField)); + + assertEquals(expectedField, writer.getField()); + + vector.validate(); + } + } + + @Test + public void testWriterGetDurationField() { + // adopted from ListVector test cases + try (final ListViewVector vector = ListViewVector.empty("listview", allocator)) { + org.apache.arrow.vector.complex.writer.FieldWriter writer = vector.getWriter(); + writer.allocate(); + + DurationHolder durationHolder = new DurationHolder(); + durationHolder.unit = TimeUnit.MILLISECOND; + + writer.startList(); + durationHolder.value = 812374L; + writer.duration().write(durationHolder); + durationHolder.value = 143451L; + writer.duration().write(durationHolder); + + // Writing with a different unit should throw + durationHolder.unit = TimeUnit.SECOND; + durationHolder.value = 8888888; + IllegalArgumentException ex = assertThrows(IllegalArgumentException.class, + () -> writer.duration().write(durationHolder)); + assertEquals( + "holder.unit: SECOND not equal to vector unit: MILLISECOND", ex.getMessage()); + + writer.endList(); + vector.setValueCount(1); + + Field expectedDataField = new Field(BaseRepeatedValueVector.DATA_VECTOR_NAME, + FieldType.nullable(new ArrowType.Duration(TimeUnit.MILLISECOND)), null); + Field expectedField = new Field(vector.getName(), + FieldType.nullable(ArrowType.ListView.INSTANCE), + Arrays.asList(expectedDataField)); + + assertEquals(expectedField, writer.getField()); + + vector.validate(); + } + } + + @Test + public void testClose() throws Exception { + try (final ListViewVector vector = ListViewVector.empty("listview", allocator)) { + + UnionListViewWriter writer = vector.getWriter(); + writer.allocate(); + + //set some values + writer.startList(); + writer.integer().writeInt(1); + writer.integer().writeInt(2); + writer.endList(); + vector.setValueCount(2); + + assertTrue(vector.getBufferSize() > 0); + assertTrue(vector.getDataVector().getBufferSize() > 0); + + writer.close(); + assertEquals(0, vector.getBufferSize()); + assertEquals(0, vector.getDataVector().getBufferSize()); + + vector.validate(); + } + } + + @Test + public void testGetBufferSizeFor() { + try (final ListViewVector vector = ListViewVector.empty("listview", allocator)) { + + UnionListViewWriter writer = vector.getWriter(); + writer.allocate(); + + //set some values + writeIntValues(writer, new int[] {1, 2}); + writeIntValues(writer, new int[] {3, 4}); + writeIntValues(writer, new int[] {5, 6}); + writeIntValues(writer, new int[] {7, 8, 9, 10}); + writeIntValues(writer, new int[] {11, 12, 13, 14}); + writer.setValueCount(5); + + IntVector dataVector = (IntVector) vector.getDataVector(); + int[] indices = new int[] {0, 2, 4, 6, 10, 14}; + + for (int valueCount = 1; valueCount <= 5; valueCount++) { + int validityBufferSize = BitVectorHelper.getValidityBufferSize(valueCount); + int offsetBufferSize = valueCount * BaseRepeatedValueViewVector.OFFSET_WIDTH; + int sizeBufferSize = valueCount * BaseRepeatedValueViewVector.SIZE_WIDTH; + + int expectedSize = validityBufferSize + offsetBufferSize + sizeBufferSize + + dataVector.getBufferSizeFor(indices[valueCount]); + assertEquals(expectedSize, vector.getBufferSizeFor(valueCount)); + } + vector.validate(); + } + } + + @Test + public void testIsEmpty() { + try (final ListViewVector vector = ListViewVector.empty("listview", allocator)) { + UnionListViewWriter writer = vector.getWriter(); + writer.allocate(); + + // set values [1,2], null, [], [5,6] + writeIntValues(writer, new int[] {1, 2}); + writer.setPosition(2); + writeIntValues(writer, new int[] {}); + writeIntValues(writer, new int[] {5, 6}); + writer.setValueCount(4); + + assertFalse(vector.isEmpty(0)); + assertTrue(vector.isNull(1)); + assertTrue(vector.isEmpty(1)); + assertFalse(vector.isNull(2)); + assertTrue(vector.isEmpty(2)); + assertFalse(vector.isEmpty(3)); + + vector.validate(); + } + } + + @Test + public void testTotalCapacity() { + // adopted from ListVector test cases + final FieldType type = FieldType.nullable(MinorType.INT.getType()); + try (final ListViewVector vector = new ListViewVector("listview", allocator, type, null)) { + // Force the child vector to be allocated based on the type + // (this is a bad API: we have to track and repeat the type twice) + vector.addOrGetVector(type); + + // Specify the allocation size but do not allocate + vector.setInitialTotalCapacity(10, 100); + + // Finally, actually do the allocation + vector.allocateNewSafe(); + + // Note: allocator rounds up and can be greater than the requested allocation. + assertTrue(vector.getValueCapacity() >= 10); + assertTrue(vector.getDataVector().getValueCapacity() >= 100); + + vector.validate(); + } + } + + @Test + public void testSetNull1() { + try (ListViewVector vector = ListViewVector.empty("listview", allocator)) { + UnionListViewWriter writer = vector.getWriter(); + writer.allocate(); + + writer.setPosition(0); + writer.startList(); + writer.bigInt().writeBigInt(10); + writer.bigInt().writeBigInt(20); + writer.endList(); + + vector.setNull(1); + + writer.setPosition(2); + writer.startList(); + writer.bigInt().writeBigInt(30); + writer.bigInt().writeBigInt(40); + writer.endList(); + + vector.setNull(3); + vector.setNull(4); + + writer.setPosition(5); + writer.startList(); + writer.bigInt().writeBigInt(50); + writer.bigInt().writeBigInt(60); + writer.endList(); + + vector.setValueCount(6); + + assertFalse(vector.isNull(0)); + assertTrue(vector.isNull(1)); + assertFalse(vector.isNull(2)); + assertTrue(vector.isNull(3)); + assertTrue(vector.isNull(4)); + assertFalse(vector.isNull(5)); + + // validate buffers + + final ArrowBuf validityBuffer = vector.getValidityBuffer(); + final ArrowBuf offsetBuffer = vector.getOffsetBuffer(); + final ArrowBuf sizeBuffer = vector.getSizeBuffer(); + + assertEquals(1, BitVectorHelper.get(validityBuffer, 0)); + assertEquals(0, BitVectorHelper.get(validityBuffer, 1)); + assertEquals(1, BitVectorHelper.get(validityBuffer, 2)); + assertEquals(0, BitVectorHelper.get(validityBuffer, 3)); + assertEquals(0, BitVectorHelper.get(validityBuffer, 4)); + assertEquals(1, BitVectorHelper.get(validityBuffer, 5)); + + assertEquals(0, offsetBuffer.getInt(0 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(0, offsetBuffer.getInt(1 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(2, offsetBuffer.getInt(2 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(0, offsetBuffer.getInt(3 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(0, offsetBuffer.getInt(4 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(4, offsetBuffer.getInt(5 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + + assertEquals(2, sizeBuffer.getInt(0 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(0, sizeBuffer.getInt(1 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(2, sizeBuffer.getInt(2 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(0, sizeBuffer.getInt(3 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(0, sizeBuffer.getInt(4 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(2, sizeBuffer.getInt(5 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + + // validate values + + Object result = vector.getObject(0); + ArrayList resultSet = (ArrayList) result; + assertEquals(2, resultSet.size()); + assertEquals(Long.valueOf(10), resultSet.get(0)); + assertEquals(Long.valueOf(20), resultSet.get(1)); + + result = vector.getObject(2); + resultSet = (ArrayList) result; + assertEquals(2, resultSet.size()); + assertEquals(Long.valueOf(30), resultSet.get(0)); + assertEquals(Long.valueOf(40), resultSet.get(1)); + + result = vector.getObject(5); + resultSet = (ArrayList) result; + assertEquals(2, resultSet.size()); + assertEquals(Long.valueOf(50), resultSet.get(0)); + assertEquals(Long.valueOf(60), resultSet.get(1)); + + vector.validate(); + } + } + + @Test + public void testSetNull2() { + try (ListViewVector vector = ListViewVector.empty("listview", allocator)) { + // validate setting nulls first and then writing values + UnionListViewWriter writer = vector.getWriter(); + writer.allocate(); + + vector.setNull(0); + vector.setNull(2); + vector.setNull(4); + + writer.setPosition(1); + writer.startList(); + writer.bigInt().writeBigInt(10); + writer.bigInt().writeBigInt(20); + writer.bigInt().writeBigInt(30); + writer.endList(); + + writer.setPosition(3); + writer.startList(); + writer.bigInt().writeBigInt(40); + writer.bigInt().writeBigInt(50); + writer.endList(); + + writer.setPosition(5); + writer.startList(); + writer.bigInt().writeBigInt(60); + writer.bigInt().writeBigInt(70); + writer.bigInt().writeBigInt(80); + writer.endList(); + + vector.setValueCount(6); + + assertTrue(vector.isNull(0)); + assertFalse(vector.isNull(1)); + assertTrue(vector.isNull(2)); + assertFalse(vector.isNull(3)); + assertTrue(vector.isNull(4)); + assertFalse(vector.isNull(5)); + + // validate buffers + + final ArrowBuf validityBuffer = vector.getValidityBuffer(); + final ArrowBuf offsetBuffer = vector.getOffsetBuffer(); + final ArrowBuf sizeBuffer = vector.getSizeBuffer(); + + assertEquals(0, BitVectorHelper.get(validityBuffer, 0)); + assertEquals(1, BitVectorHelper.get(validityBuffer, 1)); + assertEquals(0, BitVectorHelper.get(validityBuffer, 2)); + assertEquals(1, BitVectorHelper.get(validityBuffer, 3)); + assertEquals(0, BitVectorHelper.get(validityBuffer, 4)); + assertEquals(1, BitVectorHelper.get(validityBuffer, 5)); + + assertEquals(0, offsetBuffer.getInt(0 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(0, offsetBuffer.getInt(1 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(0, offsetBuffer.getInt(2 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(3, offsetBuffer.getInt(3 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(0, offsetBuffer.getInt(4 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(5, offsetBuffer.getInt(5 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + + assertEquals(0, sizeBuffer.getInt(0 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(3, sizeBuffer.getInt(1 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(0, sizeBuffer.getInt(2 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(2, sizeBuffer.getInt(3 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(0, sizeBuffer.getInt(4 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(3, sizeBuffer.getInt(5 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + + // validate values + + Object result = vector.getObject(1); + ArrayList resultSet = (ArrayList) result; + assertEquals(3, resultSet.size()); + assertEquals(Long.valueOf(10), resultSet.get(0)); + assertEquals(Long.valueOf(20), resultSet.get(1)); + assertEquals(Long.valueOf(30), resultSet.get(2)); + + result = vector.getObject(3); + resultSet = (ArrayList) result; + assertEquals(2, resultSet.size()); + assertEquals(Long.valueOf(40), resultSet.get(0)); + assertEquals(Long.valueOf(50), resultSet.get(1)); + + result = vector.getObject(5); + resultSet = (ArrayList) result; + assertEquals(3, resultSet.size()); + assertEquals(Long.valueOf(60), resultSet.get(0)); + assertEquals(Long.valueOf(70), resultSet.get(1)); + assertEquals(Long.valueOf(80), resultSet.get(2)); + + vector.validate(); + } + } + + @Test + public void testSetNull3() { + try (ListViewVector vector = ListViewVector.empty("listview", allocator)) { + // validate setting values first and then writing nulls + UnionListViewWriter writer = vector.getWriter(); + writer.allocate(); + + writer.setPosition(1); + writer.startList(); + writer.bigInt().writeBigInt(10); + writer.bigInt().writeBigInt(20); + writer.bigInt().writeBigInt(30); + writer.endList(); + + writer.setPosition(3); + writer.startList(); + writer.bigInt().writeBigInt(40); + writer.bigInt().writeBigInt(50); + writer.endList(); + + writer.setPosition(5); + writer.startList(); + writer.bigInt().writeBigInt(60); + writer.bigInt().writeBigInt(70); + writer.bigInt().writeBigInt(80); + writer.endList(); + + vector.setNull(0); + vector.setNull(2); + vector.setNull(4); + + vector.setValueCount(6); + + assertTrue(vector.isNull(0)); + assertFalse(vector.isNull(1)); + assertTrue(vector.isNull(2)); + assertFalse(vector.isNull(3)); + assertTrue(vector.isNull(4)); + assertFalse(vector.isNull(5)); + + // validate buffers + + final ArrowBuf validityBuffer = vector.getValidityBuffer(); + final ArrowBuf offsetBuffer = vector.getOffsetBuffer(); + final ArrowBuf sizeBuffer = vector.getSizeBuffer(); + + assertEquals(0, BitVectorHelper.get(validityBuffer, 0)); + assertEquals(1, BitVectorHelper.get(validityBuffer, 1)); + assertEquals(0, BitVectorHelper.get(validityBuffer, 2)); + assertEquals(1, BitVectorHelper.get(validityBuffer, 3)); + assertEquals(0, BitVectorHelper.get(validityBuffer, 4)); + assertEquals(1, BitVectorHelper.get(validityBuffer, 5)); + + assertEquals(0, offsetBuffer.getInt(0 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(0, offsetBuffer.getInt(1 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(0, offsetBuffer.getInt(2 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(3, offsetBuffer.getInt(3 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(0, offsetBuffer.getInt(4 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(5, offsetBuffer.getInt(5 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + + assertEquals(0, sizeBuffer.getInt(0 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(3, sizeBuffer.getInt(1 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(0, sizeBuffer.getInt(2 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(2, sizeBuffer.getInt(3 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(0, sizeBuffer.getInt(4 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(3, sizeBuffer.getInt(5 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + + // validate values + + Object result = vector.getObject(1); + ArrayList resultSet = (ArrayList) result; + assertEquals(3, resultSet.size()); + assertEquals(Long.valueOf(10), resultSet.get(0)); + assertEquals(Long.valueOf(20), resultSet.get(1)); + assertEquals(Long.valueOf(30), resultSet.get(2)); + + result = vector.getObject(3); + resultSet = (ArrayList) result; + assertEquals(2, resultSet.size()); + assertEquals(Long.valueOf(40), resultSet.get(0)); + assertEquals(Long.valueOf(50), resultSet.get(1)); + + result = vector.getObject(5); + resultSet = (ArrayList) result; + assertEquals(3, resultSet.size()); + assertEquals(Long.valueOf(60), resultSet.get(0)); + assertEquals(Long.valueOf(70), resultSet.get(1)); + assertEquals(Long.valueOf(80), resultSet.get(2)); + + vector.validate(); + } + } + + @Test + public void testOverWrite1() { + try (ListViewVector vector = ListViewVector.empty("listview", allocator)) { + UnionListViewWriter writer = vector.getWriter(); + writer.allocate(); + + writer.setPosition(0); + writer.startList(); + writer.bigInt().writeBigInt(10); + writer.bigInt().writeBigInt(20); + writer.bigInt().writeBigInt(30); + writer.endList(); + + writer.setPosition(1); + writer.startList(); + writer.bigInt().writeBigInt(40); + writer.bigInt().writeBigInt(50); + writer.endList(); + + vector.setValueCount(2); + + writer.setPosition(0); + writer.startList(); + writer.bigInt().writeBigInt(60); + writer.bigInt().writeBigInt(70); + writer.endList(); + + writer.setPosition(1); + writer.startList(); + writer.bigInt().writeBigInt(80); + writer.bigInt().writeBigInt(90); + writer.endList(); + + vector.setValueCount(2); + + Object result = vector.getObject(0); + ArrayList resultSet = (ArrayList) result; + assertEquals(2, resultSet.size()); + assertEquals(Long.valueOf(60), resultSet.get(0)); + assertEquals(Long.valueOf(70), resultSet.get(1)); + + result = vector.getObject(1); + resultSet = (ArrayList) result; + assertEquals(2, resultSet.size()); + assertEquals(Long.valueOf(80), resultSet.get(0)); + assertEquals(Long.valueOf(90), resultSet.get(1)); + + vector.validate(); + } + } + + @Test + public void testOverwriteWithNull() { + try (ListViewVector vector = ListViewVector.empty("listview", allocator)) { + UnionListViewWriter writer = vector.getWriter(); + writer.allocate(); + + ArrowBuf offsetBuffer = vector.getOffsetBuffer(); + ArrowBuf sizeBuffer = vector.getSizeBuffer(); + + writer.setPosition(0); + writer.startList(); + writer.bigInt().writeBigInt(10); + writer.bigInt().writeBigInt(20); + writer.bigInt().writeBigInt(30); + writer.endList(); + + writer.setPosition(1); + writer.startList(); + writer.bigInt().writeBigInt(40); + writer.bigInt().writeBigInt(50); + writer.endList(); + + vector.setValueCount(2); + + assertEquals(0, offsetBuffer.getInt(0 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(3, offsetBuffer.getInt(1 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + + assertEquals(3, sizeBuffer.getInt(0 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(2, sizeBuffer.getInt(1 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + + vector.setNull(0); + + assertEquals(0, offsetBuffer.getInt(0 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(0, sizeBuffer.getInt(0 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + + vector.setNull(1); + + assertEquals(0, offsetBuffer.getInt(0 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(0, sizeBuffer.getInt(0 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + + assertTrue(vector.isNull(0)); + assertTrue(vector.isNull(1)); + + writer.setPosition(0); + writer.startList(); + writer.bigInt().writeBigInt(60); + writer.bigInt().writeBigInt(70); + writer.endList(); + + assertEquals(0, offsetBuffer.getInt(0 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(2, sizeBuffer.getInt(0 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + + writer.setPosition(1); + writer.startList(); + writer.bigInt().writeBigInt(80); + writer.bigInt().writeBigInt(90); + writer.endList(); + + assertEquals(2, offsetBuffer.getInt(1 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(2, sizeBuffer.getInt(1 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + + vector.setValueCount(2); + + assertFalse(vector.isNull(0)); + assertFalse(vector.isNull(1)); + + Object result = vector.getObject(0); + ArrayList resultSet = (ArrayList) result; + assertEquals(2, resultSet.size()); + assertEquals(Long.valueOf(60), resultSet.get(0)); + assertEquals(Long.valueOf(70), resultSet.get(1)); + + result = vector.getObject(1); + resultSet = (ArrayList) result; + assertEquals(2, resultSet.size()); + assertEquals(Long.valueOf(80), resultSet.get(0)); + assertEquals(Long.valueOf(90), resultSet.get(1)); + + vector.validate(); + } + } + + @Test + public void testOutOfOrderOffset1() { + // [[12, -7, 25], null, [0, -127, 127, 50], [], [50, 12]] + try (ListViewVector listViewVector = ListViewVector.empty("listview", allocator)) { + // Allocate buffers in listViewVector by calling `allocateNew` method. + listViewVector.allocateNew(); + + // Initialize the child vector using `initializeChildrenFromFields` method. + + FieldType fieldType = new FieldType(true, new ArrowType.Int(16, true), + null, null); + Field field = new Field("child-vector", fieldType, null); + listViewVector.initializeChildrenFromFields(Collections.singletonList(field)); + + // Set values in the child vector. + FieldVector fieldVector = listViewVector.getDataVector(); + fieldVector.clear(); + + SmallIntVector childVector = (SmallIntVector) fieldVector; + + childVector.allocateNew(7); + + childVector.set(0, 0); + childVector.set(1, -127); + childVector.set(2, 127); + childVector.set(3, 50); + childVector.set(4, 12); + childVector.set(5, -7); + childVector.set(6, 25); + + childVector.setValueCount(7); + + // Set validity, offset and size buffers using `setValidity`, + // `setOffset` and `setSize` methods. + listViewVector.setValidity(0, 1); + listViewVector.setValidity(1, 0); + listViewVector.setValidity(2, 1); + listViewVector.setValidity(3, 1); + listViewVector.setValidity(4, 1); + + listViewVector.setOffset(0, 4); + listViewVector.setOffset(1, 7); + listViewVector.setOffset(2, 0); + listViewVector.setOffset(3, 0); + listViewVector.setOffset(4, 3); + + listViewVector.setSize(0, 3); + listViewVector.setSize(1, 0); + listViewVector.setSize(2, 4); + listViewVector.setSize(3, 0); + listViewVector.setSize(4, 2); + + // Set value count using `setValueCount` method. + listViewVector.setValueCount(5); + + final ArrowBuf offSetBuffer = listViewVector.getOffsetBuffer(); + final ArrowBuf sizeBuffer = listViewVector.getSizeBuffer(); + + // check offset buffer + assertEquals(4, offSetBuffer.getInt(0 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(7, offSetBuffer.getInt(1 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(0, offSetBuffer.getInt(2 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(0, offSetBuffer.getInt(3 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(3, offSetBuffer.getInt(4 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + + // check size buffer + assertEquals(3, sizeBuffer.getInt(0 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(0, sizeBuffer.getInt(1 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(4, sizeBuffer.getInt(2 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(0, sizeBuffer.getInt(3 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(2, sizeBuffer.getInt(4 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + + // check child vector + assertEquals(0, ((SmallIntVector) listViewVector.getDataVector()).get(0)); + assertEquals(-127, ((SmallIntVector) listViewVector.getDataVector()).get(1)); + assertEquals(127, ((SmallIntVector) listViewVector.getDataVector()).get(2)); + assertEquals(50, ((SmallIntVector) listViewVector.getDataVector()).get(3)); + assertEquals(12, ((SmallIntVector) listViewVector.getDataVector()).get(4)); + assertEquals(-7, ((SmallIntVector) listViewVector.getDataVector()).get(5)); + assertEquals(25, ((SmallIntVector) listViewVector.getDataVector()).get(6)); + + // check values + Object result = listViewVector.getObject(0); + ArrayList resultSet = (ArrayList) result; + assertEquals(3, resultSet.size()); + assertEquals(Short.valueOf("12"), resultSet.get(0)); + assertEquals(Short.valueOf("-7"), resultSet.get(1)); + assertEquals(Short.valueOf("25"), resultSet.get(2)); + + assertTrue(listViewVector.isNull(1)); + + result = listViewVector.getObject(2); + resultSet = (ArrayList) result; + assertEquals(4, resultSet.size()); + assertEquals(Short.valueOf("0"), resultSet.get(0)); + assertEquals(Short.valueOf("-127"), resultSet.get(1)); + assertEquals(Short.valueOf("127"), resultSet.get(2)); + assertEquals(Short.valueOf("50"), resultSet.get(3)); + + assertTrue(listViewVector.isEmpty(3)); + + result = listViewVector.getObject(4); + resultSet = (ArrayList) result; + assertEquals(2, resultSet.size()); + assertEquals(Short.valueOf("50"), resultSet.get(0)); + assertEquals(Short.valueOf("12"), resultSet.get(1)); + + listViewVector.validate(); + } + } + + private void writeIntValues(UnionListViewWriter writer, int[] values) { + writer.startList(); + for (int v: values) { + writer.integer().writeInt(v); + } + writer.endList(); + } + +} diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestMapVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestMapVector.java index 43f4c3b536fdc..3ffbcc29c9e59 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestMapVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestMapVector.java @@ -17,13 +17,15 @@ package org.apache.arrow.vector; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertNull; -import static org.junit.Assert.assertSame; -import static org.junit.Assert.assertTrue; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertSame; +import static org.junit.jupiter.api.Assertions.assertTrue; import java.util.ArrayList; +import java.util.Collections; +import java.util.List; import java.util.Map; import org.apache.arrow.memory.ArrowBuf; @@ -37,23 +39,24 @@ import org.apache.arrow.vector.complex.writer.BaseWriter.MapWriter; import org.apache.arrow.vector.types.Types.MinorType; import org.apache.arrow.vector.types.pojo.ArrowType; +import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.FieldType; import org.apache.arrow.vector.util.JsonStringArrayList; import org.apache.arrow.vector.util.TransferPair; -import org.junit.After; -import org.junit.Before; -import org.junit.Test; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; public class TestMapVector { private BufferAllocator allocator; - @Before + @BeforeEach public void init() { allocator = new DirtyRootAllocator(Long.MAX_VALUE, (byte) 100); } - @After + @AfterEach public void terminate() throws Exception { allocator.close(); } @@ -90,7 +93,7 @@ public void testBasicOperation() { mapReader.setPosition(i); for (int j = 0; j < i + 1; j++) { mapReader.next(); - assertEquals("record: " + i, j, mapReader.key().readLong().longValue()); + assertEquals(j, mapReader.key().readLong().longValue(), "record: " + i); assertEquals(j, mapReader.value().readInteger().intValue()); } } @@ -136,7 +139,7 @@ public void testBasicOperationNulls() { } else { for (int j = 0; j < i + 1; j++) { mapReader.next(); - assertEquals("record: " + i, j, mapReader.key().readLong().longValue()); + assertEquals(j, mapReader.key().readLong().longValue(), "record: " + i); if (i == 5) { assertFalse(mapReader.value().isSet()); } else { @@ -194,11 +197,11 @@ public void testCopyFrom() throws Exception { // assert the output vector is correct FieldReader reader = outVector.getReader(); - assertTrue("shouldn't be null", reader.isSet()); + assertTrue(reader.isSet(), "shouldn't be null"); reader.setPosition(1); - assertFalse("should be null", reader.isSet()); + assertFalse(reader.isSet(), "should be null"); reader.setPosition(2); - assertTrue("shouldn't be null", reader.isSet()); + assertTrue(reader.isSet(), "shouldn't be null"); /* index 0 */ @@ -460,15 +463,15 @@ public void testSplitAndTransfer() throws Exception { dataLength2 = toOffsetBuffer.getInt((i + 1) * MapVector.OFFSET_WIDTH) - toOffsetBuffer.getInt(i * MapVector.OFFSET_WIDTH); - assertEquals("Different data lengths at index: " + i + " and start: " + start, - dataLength1, dataLength2); + assertEquals(dataLength1, dataLength2, + "Different data lengths at index: " + i + " and start: " + start); offset1 = offsetBuffer.getInt((start + i) * MapVector.OFFSET_WIDTH); offset2 = toOffsetBuffer.getInt(i * MapVector.OFFSET_WIDTH); for (int j = 0; j < dataLength1; j++) { - assertEquals("Different data at indexes: " + offset1 + " and " + offset2, - dataVector.getObject(offset1), dataVector1.getObject(offset2)); + assertEquals(dataVector.getObject(offset1), dataVector1.getObject(offset2), + "Different data at indexes: " + offset1 + " and " + offset2); offset1++; offset2++; @@ -1178,4 +1181,21 @@ public void testGetTransferPairWithFieldAndCallBack() { toVector.clear(); } } + + @Test + public void testMakeTransferPairPreserveNullability() { + Field intField = new Field("int", FieldType.notNullable(MinorType.INT.getType()), null); + List fields = Collections.singletonList(intField); + Field structField = new Field("struct", FieldType.notNullable(ArrowType.Struct.INSTANCE), fields); + Field structField2 = new Field("struct", FieldType.notNullable(ArrowType.Struct.INSTANCE), fields); + FieldVector vec = structField.createVector(allocator); + + TransferPair tp = vec.getTransferPair(structField2, allocator); + tp.transfer(); + + FieldVector res = (FieldVector) tp.getTo(); + + assertEquals(intField, vec.getField().getChildren().get(0)); + assertEquals(intField, res.getField().getChildren().get(0)); + } } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestNullCheckingForGet.java b/java/vector/src/test/java/org/apache/arrow/vector/TestNullCheckingForGet.java index f1345e88ab8b9..51ad470bb6417 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestNullCheckingForGet.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestNullCheckingForGet.java @@ -17,11 +17,13 @@ package org.apache.arrow.vector; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; + import java.lang.reflect.Field; import java.net.URLClassLoader; -import org.junit.Assert; -import org.junit.Test; +import org.junit.jupiter.api.Test; /** * Test cases for {@link NullCheckingForGet}. @@ -63,7 +65,7 @@ public void testDefaultValue() throws Exception { ClassLoader classLoader = copyClassLoader(); if (classLoader != null) { boolean nullCheckingEnabled = getFlagValue(classLoader); - Assert.assertTrue(nullCheckingEnabled); + assertTrue(nullCheckingEnabled); } } @@ -79,7 +81,7 @@ public void testEnableSysProperty() throws Exception { ClassLoader classLoader = copyClassLoader(); if (classLoader != null) { boolean nullCheckingEnabled = getFlagValue(classLoader); - Assert.assertFalse(nullCheckingEnabled); + assertFalse(nullCheckingEnabled); } // restore system property diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestOutOfMemoryForValueVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestOutOfMemoryForValueVector.java index 7f26b5c1b79f6..200786f54a92d 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestOutOfMemoryForValueVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestOutOfMemoryForValueVector.java @@ -17,12 +17,14 @@ package org.apache.arrow.vector; +import static org.junit.jupiter.api.Assertions.assertThrows; + import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.OutOfMemoryException; import org.apache.arrow.memory.RootAllocator; -import org.junit.After; -import org.junit.Before; -import org.junit.Test; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; /** * This class tests cases where we expect to receive {@link OutOfMemoryException}. @@ -33,40 +35,48 @@ public class TestOutOfMemoryForValueVector { private BufferAllocator allocator; - @Before + @BeforeEach public void init() { allocator = new RootAllocator(200); // Start with low memory limit } - @Test(expected = OutOfMemoryException.class) + @Test public void variableWidthVectorAllocateNew() { - try (VarCharVector vector = new VarCharVector(EMPTY_SCHEMA_PATH, allocator)) { - vector.allocateNew(); - } + assertThrows(OutOfMemoryException.class, () -> { + try (VarCharVector vector = new VarCharVector(EMPTY_SCHEMA_PATH, allocator)) { + vector.allocateNew(); + } + }); } - @Test(expected = OutOfMemoryException.class) + @Test public void variableWidthVectorAllocateNewCustom() { - try (VarCharVector vector = new VarCharVector(EMPTY_SCHEMA_PATH, allocator)) { - vector.allocateNew(2342, 234); - } + assertThrows(OutOfMemoryException.class, () -> { + try (VarCharVector vector = new VarCharVector(EMPTY_SCHEMA_PATH, allocator)) { + vector.allocateNew(2342, 234); + } + }); } - @Test(expected = OutOfMemoryException.class) + @Test public void fixedWidthVectorAllocateNew() { - try (IntVector vector = new IntVector(EMPTY_SCHEMA_PATH, allocator)) { - vector.allocateNew(); - } + assertThrows(OutOfMemoryException.class, () -> { + try (IntVector vector = new IntVector(EMPTY_SCHEMA_PATH, allocator)) { + vector.allocateNew(); + } + }); } - @Test(expected = OutOfMemoryException.class) + @Test public void fixedWidthVectorAllocateNewCustom() { - try (IntVector vector = new IntVector(EMPTY_SCHEMA_PATH, allocator)) { - vector.allocateNew(2342); - } + assertThrows(OutOfMemoryException.class, () -> { + try (IntVector vector = new IntVector(EMPTY_SCHEMA_PATH, allocator)) { + vector.allocateNew(2342); + } + }); } - @After + @AfterEach public void terminate() { allocator.close(); } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestOversizedAllocationForValueVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestOversizedAllocationForValueVector.java index 23414e9f5df1c..f89828e4ceeb2 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestOversizedAllocationForValueVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestOversizedAllocationForValueVector.java @@ -18,15 +18,16 @@ package org.apache.arrow.vector; import static org.apache.arrow.memory.util.LargeMemoryUtil.checkedCastToInt; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertTrue; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; import org.apache.arrow.vector.util.OversizedAllocationException; -import org.junit.After; -import org.junit.Before; -import org.junit.Test; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; /** * This class tests that OversizedAllocationException occurs when a large memory is allocated for a vector. @@ -39,94 +40,100 @@ public class TestOversizedAllocationForValueVector { private BufferAllocator allocator; - @Before + @BeforeEach public void init() { allocator = new RootAllocator(Long.MAX_VALUE); } - @After + @AfterEach public void terminate() throws Exception { allocator.close(); } - @Test(expected = OversizedAllocationException.class) + @Test public void testFixedVectorReallocation() { - final UInt4Vector vector = new UInt4Vector(EMPTY_SCHEMA_PATH, allocator); - // edge case 1: buffer size = max value capacity - final int expectedValueCapacity = checkedCastToInt(BaseValueVector.MAX_ALLOCATION_SIZE / 4); - try { - vector.allocateNew(expectedValueCapacity); - assertEquals(expectedValueCapacity, vector.getValueCapacity()); - vector.reAlloc(); - assertEquals(expectedValueCapacity * 2, vector.getValueCapacity()); - } finally { - vector.close(); - } + assertThrows(OversizedAllocationException.class, () -> { + final UInt4Vector vector = new UInt4Vector(EMPTY_SCHEMA_PATH, allocator); + // edge case 1: buffer size = max value capacity + final int expectedValueCapacity = checkedCastToInt(BaseValueVector.MAX_ALLOCATION_SIZE / 4); + try { + vector.allocateNew(expectedValueCapacity); + assertEquals(expectedValueCapacity, vector.getValueCapacity()); + vector.reAlloc(); + assertEquals(expectedValueCapacity * 2, vector.getValueCapacity()); + } finally { + vector.close(); + } - // common case: value count < max value capacity - try { - vector.allocateNew(checkedCastToInt(BaseValueVector.MAX_ALLOCATION_SIZE / 8)); - vector.reAlloc(); // value allocation reaches to MAX_VALUE_ALLOCATION - vector.reAlloc(); // this should throw an IOOB - } finally { - vector.close(); - } + // common case: value count < max value capacity + try { + vector.allocateNew(checkedCastToInt(BaseValueVector.MAX_ALLOCATION_SIZE / 8)); + vector.reAlloc(); // value allocation reaches to MAX_VALUE_ALLOCATION + vector.reAlloc(); // this should throw an IOOB + } finally { + vector.close(); + } + }); } - @Test(expected = OversizedAllocationException.class) + @Test public void testBitVectorReallocation() { - final BitVector vector = new BitVector(EMPTY_SCHEMA_PATH, allocator); - // edge case 1: buffer size ~ max value capacity - final int expectedValueCapacity = 1 << 29; - try { - vector.allocateNew(expectedValueCapacity); - assertEquals(expectedValueCapacity, vector.getValueCapacity()); - vector.reAlloc(); - assertEquals(expectedValueCapacity * 2, vector.getValueCapacity()); - } finally { - vector.close(); - } + assertThrows(OversizedAllocationException.class, () -> { + final BitVector vector = new BitVector(EMPTY_SCHEMA_PATH, allocator); + // edge case 1: buffer size ~ max value capacity + final int expectedValueCapacity = 1 << 29; + try { + vector.allocateNew(expectedValueCapacity); + assertEquals(expectedValueCapacity, vector.getValueCapacity()); + vector.reAlloc(); + assertEquals(expectedValueCapacity * 2, vector.getValueCapacity()); + } finally { + vector.close(); + } - // common: value count < MAX_VALUE_ALLOCATION - try { - vector.allocateNew(expectedValueCapacity); - for (int i = 0; i < 3; i++) { - vector.reAlloc(); // expand buffer size + // common: value count < MAX_VALUE_ALLOCATION + try { + vector.allocateNew(expectedValueCapacity); + for (int i = 0; i < 3; i++) { + vector.reAlloc(); // expand buffer size + } + assertEquals(Integer.MAX_VALUE, vector.getValueCapacity()); + vector.reAlloc(); // buffer size ~ max allocation + assertEquals(Integer.MAX_VALUE, vector.getValueCapacity()); + vector.reAlloc(); // overflow + } finally { + vector.close(); } - assertEquals(Integer.MAX_VALUE, vector.getValueCapacity()); - vector.reAlloc(); // buffer size ~ max allocation - assertEquals(Integer.MAX_VALUE, vector.getValueCapacity()); - vector.reAlloc(); // overflow - } finally { - vector.close(); - } + }); } - @Test(expected = OversizedAllocationException.class) + @Test public void testVariableVectorReallocation() { - final VarCharVector vector = new VarCharVector(EMPTY_SCHEMA_PATH, allocator); - // edge case 1: value count = MAX_VALUE_ALLOCATION - final long expectedAllocationInBytes = BaseValueVector.MAX_ALLOCATION_SIZE; - final int expectedOffsetSize = 10; - try { - vector.allocateNew(expectedAllocationInBytes, 10); - assertTrue(expectedOffsetSize <= vector.getValueCapacity()); - assertTrue(expectedAllocationInBytes <= vector.getDataBuffer().capacity()); - vector.reAlloc(); - assertTrue(expectedOffsetSize * 2 <= vector.getValueCapacity()); - assertTrue(expectedAllocationInBytes * 2 <= vector.getDataBuffer().capacity()); - } finally { - vector.close(); - } + assertThrows(OversizedAllocationException.class, () -> { + final VarCharVector vector = new VarCharVector(EMPTY_SCHEMA_PATH, allocator); + // edge case 1: value count = MAX_VALUE_ALLOCATION + final long expectedAllocationInBytes = BaseValueVector.MAX_ALLOCATION_SIZE; + final int expectedOffsetSize = 10; + try { + vector.allocateNew(expectedAllocationInBytes, 10); + assertTrue(expectedOffsetSize <= vector.getValueCapacity()); + assertTrue(expectedAllocationInBytes <= vector.getDataBuffer().capacity()); + vector.reAlloc(); + assertTrue(expectedOffsetSize * 2 <= vector.getValueCapacity()); + assertTrue(expectedAllocationInBytes * 2 <= vector.getDataBuffer().capacity()); + } finally { + vector.close(); + } - // common: value count < MAX_VALUE_ALLOCATION - try { - vector.allocateNew(BaseValueVector.MAX_ALLOCATION_SIZE / 2, 0); - vector.reAlloc(); // value allocation reaches to MAX_VALUE_ALLOCATION - vector.reAlloc(); // this tests if it overflows - } finally { - vector.close(); - } + // common: value count < MAX_VALUE_ALLOCATION + try { + vector.allocateNew(BaseValueVector.MAX_ALLOCATION_SIZE / 2, 0); + vector.reAlloc(); // value allocation reaches to MAX_VALUE_ALLOCATION + vector.reAlloc(); // this tests if it overflows + } finally { + vector.close(); + } + }); } } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestPeriodDuration.java b/java/vector/src/test/java/org/apache/arrow/vector/TestPeriodDuration.java index 2b9f4cca8c22f..bf4cda6b4271a 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestPeriodDuration.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestPeriodDuration.java @@ -17,8 +17,8 @@ package org.apache.arrow.vector; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertNotEquals; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotEquals; import java.time.Duration; import java.time.LocalDate; @@ -26,7 +26,7 @@ import java.time.Period; import java.time.temporal.ChronoUnit; -import org.junit.Test; +import org.junit.jupiter.api.Test; public class TestPeriodDuration { diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestSplitAndTransfer.java b/java/vector/src/test/java/org/apache/arrow/vector/TestSplitAndTransfer.java index 396f5665e0382..fece93de9bf14 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestSplitAndTransfer.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestSplitAndTransfer.java @@ -17,10 +17,11 @@ package org.apache.arrow.vector; -import static org.junit.Assert.assertArrayEquals; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertTrue; +import static org.junit.jupiter.api.Assertions.assertArrayEquals; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; import java.nio.charset.StandardCharsets; import java.util.HashMap; @@ -38,20 +39,19 @@ import org.apache.arrow.vector.types.pojo.ArrowType.Struct; import org.apache.arrow.vector.types.pojo.FieldType; import org.apache.arrow.vector.util.TransferPair; -import org.junit.After; -import org.junit.Before; -import org.junit.Test; -import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; public class TestSplitAndTransfer { private BufferAllocator allocator; - @Before + @BeforeEach public void init() { allocator = new RootAllocator(Long.MAX_VALUE); } - @After + @AfterEach public void terminate() throws Exception { allocator.close(); } @@ -67,6 +67,18 @@ private void populateVarcharVector(final VarCharVector vector, int valueCount, S vector.setValueCount(valueCount); } + private void populateBaseVariableWidthViewVector(final BaseVariableWidthViewVector vector, int valueCount, + String[] compareArray) { + for (int i = 0; i < valueCount; i += 3) { + final String s = String.format("%010d", i); + vector.set(i, s.getBytes(StandardCharsets.UTF_8)); + if (compareArray != null) { + compareArray[i] = s; + } + } + vector.setValueCount(valueCount); + } + private void populateIntVector(final IntVector vector, int valueCount) { for (int i = 0; i < valueCount; i++) { vector.set(i, i); @@ -109,6 +121,16 @@ public void testWithEmptyVector() { transferPair = varCharVector.getTransferPair(allocator); transferPair.splitAndTransfer(0, 0); assertEquals(0, transferPair.getTo().getValueCount()); + // BaseVariableWidthViewVector: ViewVarCharVector + ViewVarCharVector viewVarCharVector = new ViewVarCharVector("", allocator); + transferPair = viewVarCharVector.getTransferPair(allocator); + transferPair.splitAndTransfer(0, 0); + assertEquals(0, transferPair.getTo().getValueCount()); + // BaseVariableWidthVector: ViewVarBinaryVector + ViewVarBinaryVector viewVarBinaryVector = new ViewVarBinaryVector("", allocator); + transferPair = viewVarBinaryVector.getTransferPair(allocator); + transferPair.splitAndTransfer(0, 0); + assertEquals(0, transferPair.getTo().getValueCount()); // BaseLargeVariableWidthVector LargeVarCharVector largeVarCharVector = new LargeVarCharVector("", allocator); transferPair = largeVarCharVector.getTransferPair(allocator); @@ -209,6 +231,49 @@ public void test() throws Exception { } } + private void testView(BaseVariableWidthViewVector vector) { + vector.allocateNew(10000, 1000); + final int valueCount = 500; + final String[] compareArray = new String[valueCount]; + + populateBaseVariableWidthViewVector(vector, valueCount, compareArray); + + final TransferPair tp = vector.getTransferPair(allocator); + final BaseVariableWidthViewVector newVector = (BaseVariableWidthViewVector) tp.getTo();; + final int[][] startLengths = {{0, 201}, {201, 0}, {201, 200}, {401, 99}}; + + for (final int[] startLength : startLengths) { + final int start = startLength[0]; + final int length = startLength[1]; + tp.splitAndTransfer(start, length); + for (int i = 0; i < length; i++) { + final boolean expectedSet = ((start + i) % 3) == 0; + if (expectedSet) { + final byte[] expectedValue = compareArray[start + i].getBytes(StandardCharsets.UTF_8); + assertFalse(newVector.isNull(i)); + assertArrayEquals(expectedValue, newVector.get(i)); + } else { + assertTrue(newVector.isNull(i)); + } + } + newVector.clear(); + } + } + + @Test + public void testUtf8View() { + try (final ViewVarCharVector viewVarCharVector = new ViewVarCharVector("myvector", allocator)) { + testView(viewVarCharVector); + } + } + + @Test + public void testBinaryView() throws Exception { + try (final ViewVarBinaryVector viewVarBinaryVector = new ViewVarBinaryVector("myvector", allocator)) { + testView(viewVarBinaryVector); + } + } + @Test public void testMemoryConstrainedTransfer() { try (final VarCharVector varCharVector = new VarCharVector("myvector", allocator)) { @@ -233,6 +298,50 @@ public void testMemoryConstrainedTransfer() { } } + private void testMemoryConstrainedTransferInViews(BaseVariableWidthViewVector vector) { + // Here we have the target vector being transferred with a long string + // hence, the data buffer will be allocated. + // The default data buffer allocation takes + // BaseVariableWidthViewVector.INITIAL_VIEW_VALUE_ALLOCATION * BaseVariableWidthViewVector.ELEMENT_SIZE + // set limit = BaseVariableWidthViewVector.INITIAL_VIEW_VALUE_ALLOCATION * + // BaseVariableWidthViewVector.ELEMENT_SIZE + final int setLimit = BaseVariableWidthViewVector.INITIAL_VIEW_VALUE_ALLOCATION * + BaseVariableWidthViewVector.ELEMENT_SIZE; + allocator.setLimit(setLimit); + + vector.allocateNew(16000, 1000); + + final int valueCount = 1000; + + populateBaseVariableWidthViewVector(vector, valueCount, null); + + final TransferPair tp = vector.getTransferPair(allocator); + final BaseVariableWidthViewVector newVector = (BaseVariableWidthViewVector) tp.getTo(); + + final int[][] startLengths = {{0, 700}, {700, 299}}; + + for (final int[] startLength : startLengths) { + final int start = startLength[0]; + final int length = startLength[1]; + tp.splitAndTransfer(start, length); + newVector.clear(); + } + } + + @Test + public void testMemoryConstrainedTransferInUtf8Views() { + try (final ViewVarCharVector viewVarCharVector = new ViewVarCharVector("myvector", allocator)) { + testMemoryConstrainedTransferInViews(viewVarCharVector); + } + } + + @Test + public void testMemoryConstrainedTransferInBinaryViews() { + try (final ViewVarBinaryVector viewVarBinaryVector = new ViewVarBinaryVector("myvector", allocator)) { + testMemoryConstrainedTransferInViews(viewVarBinaryVector); + } + } + @Test public void testTransfer() { try (final VarCharVector varCharVector = new VarCharVector("myvector", allocator)) { @@ -264,6 +373,48 @@ public void testTransfer() { } } + private void testTransferInViews(BaseVariableWidthViewVector vector) { + vector.allocateNew(16000, 1000); + + final int valueCount = 500; + final String[] compareArray = new String[valueCount]; + populateBaseVariableWidthViewVector(vector, valueCount, compareArray); + + final TransferPair tp = vector.getTransferPair(allocator); + final BaseVariableWidthViewVector newVector = (BaseVariableWidthViewVector) tp.getTo(); + tp.transfer(); + + assertEquals(0, vector.valueCount); + assertEquals(valueCount, newVector.valueCount); + + for (int i = 0; i < valueCount; i++) { + final boolean expectedSet = (i % 3) == 0; + if (expectedSet) { + final byte[] expectedValue = compareArray[i].getBytes(StandardCharsets.UTF_8); + assertFalse(newVector.isNull(i)); + assertArrayEquals(expectedValue, newVector.get(i)); + } else { + assertTrue(newVector.isNull(i)); + } + } + + newVector.clear(); + } + + @Test + public void testTransferInUtf8Views() { + try (final ViewVarCharVector viewVarCharVector = new ViewVarCharVector("myvector", allocator)) { + testTransferInViews(viewVarCharVector); + } + } + + @Test + public void testTransferInBinaryViews() { + try (final ViewVarBinaryVector viewVarBinaryVector = new ViewVarBinaryVector("myvector", allocator)) { + testTransferInViews(viewVarBinaryVector); + } + } + @Test public void testCopyValueSafe() { try (final VarCharVector varCharVector = new VarCharVector("myvector", allocator); @@ -312,6 +463,34 @@ public void testSplitAndTransferNon() { } } + private void testSplitAndTransferNonInViews(BaseVariableWidthViewVector vector) { + vector.allocateNew(16000, 1000); + final int valueCount = 500; + populateBaseVariableWidthViewVector(vector, valueCount, null); + + final TransferPair tp = vector.getTransferPair(allocator); + BaseVariableWidthViewVector newVector = (BaseVariableWidthViewVector) tp.getTo(); + + tp.splitAndTransfer(0, 0); + assertEquals(0, newVector.getValueCount()); + + newVector.clear(); + } + + @Test + public void testSplitAndTransferNonInUtf8Views() { + try (final ViewVarCharVector viewVarCharVector = new ViewVarCharVector("myvector", allocator)) { + testSplitAndTransferNonInViews(viewVarCharVector); + } + } + + @Test + public void testSplitAndTransferNonInBinaryViews() { + try (final ViewVarBinaryVector viewVarBinaryVector = new ViewVarBinaryVector("myvector", allocator)) { + testSplitAndTransferNonInViews(viewVarBinaryVector); + } + } + @Test public void testSplitAndTransferAll() { try (final VarCharVector varCharVector = new VarCharVector("myvector", allocator)) { @@ -330,6 +509,34 @@ public void testSplitAndTransferAll() { } } + private void testSplitAndTransferAllInViews(BaseVariableWidthViewVector vector) { + vector.allocateNew(16000, 1000); + final int valueCount = 500; + populateBaseVariableWidthViewVector(vector, valueCount, null); + + final TransferPair tp = vector.getTransferPair(allocator); + BaseVariableWidthViewVector newViewVarCharVector = (BaseVariableWidthViewVector) tp.getTo(); + + tp.splitAndTransfer(0, valueCount); + assertEquals(valueCount, newViewVarCharVector.getValueCount()); + + newViewVarCharVector.clear(); + } + + @Test + public void testSplitAndTransferAllInUtf8Views() { + try (final ViewVarCharVector viewVarCharVector = new ViewVarCharVector("myvector", allocator)) { + testSplitAndTransferAllInViews(viewVarCharVector); + } + } + + @Test + public void testSplitAndTransferAllInBinaryViews() { + try (final ViewVarBinaryVector viewVarBinaryVector = new ViewVarBinaryVector("myvector", allocator)) { + testSplitAndTransferAllInViews(viewVarBinaryVector); + } + } + @Test public void testInvalidStartIndex() { try (final VarCharVector varCharVector = new VarCharVector("myvector", allocator); @@ -341,7 +548,7 @@ public void testInvalidStartIndex() { final TransferPair tp = varCharVector.makeTransferPair(newVarCharVector); - IllegalArgumentException e = Assertions.assertThrows( + IllegalArgumentException e = assertThrows( IllegalArgumentException.class, () -> tp.splitAndTransfer(valueCount, 10)); @@ -351,6 +558,38 @@ public void testInvalidStartIndex() { } } + private void testInvalidStartIndexInViews(BaseVariableWidthViewVector vector, BaseVariableWidthViewVector newVector) { + vector.allocateNew(16000, 1000); + final int valueCount = 500; + populateBaseVariableWidthViewVector(vector, valueCount, null); + + final TransferPair tp = vector.makeTransferPair(newVector); + + IllegalArgumentException e = assertThrows( + IllegalArgumentException.class, + () -> tp.splitAndTransfer(valueCount, 10)); + + assertEquals("Invalid parameters startIndex: 500, length: 10 for valueCount: 500", e.getMessage()); + + newVector.clear(); + } + + @Test + public void testInvalidStartIndexInUtf8Views() { + try (final ViewVarCharVector viewVarCharVector = new ViewVarCharVector("myvector", allocator); + final ViewVarCharVector newViewVarCharVector = new ViewVarCharVector("newvector", allocator)) { + testInvalidStartIndexInViews(viewVarCharVector, newViewVarCharVector); + } + } + + @Test + public void testInvalidStartIndexInBinaryViews() { + try (final ViewVarBinaryVector viewVarBinaryVector = new ViewVarBinaryVector("myvector", allocator); + final ViewVarBinaryVector newViewVarBinaryVector = new ViewVarBinaryVector("newvector", allocator)) { + testInvalidStartIndexInViews(viewVarBinaryVector, newViewVarBinaryVector); + } + } + @Test public void testInvalidLength() { try (final VarCharVector varCharVector = new VarCharVector("myvector", allocator); @@ -362,7 +601,7 @@ public void testInvalidLength() { final TransferPair tp = varCharVector.makeTransferPair(newVarCharVector); - IllegalArgumentException e = Assertions.assertThrows( + IllegalArgumentException e = assertThrows( IllegalArgumentException.class, () -> tp.splitAndTransfer(0, valueCount * 2)); @@ -372,6 +611,38 @@ public void testInvalidLength() { } } + private void testInvalidLengthInViews(BaseVariableWidthViewVector vector, BaseVariableWidthViewVector newVector) { + vector.allocateNew(16000, 1000); + final int valueCount = 500; + populateBaseVariableWidthViewVector(vector, valueCount, null); + + final TransferPair tp = vector.makeTransferPair(newVector); + + IllegalArgumentException e = assertThrows( + IllegalArgumentException.class, + () -> tp.splitAndTransfer(0, valueCount * 2)); + + assertEquals("Invalid parameters startIndex: 0, length: 1000 for valueCount: 500", e.getMessage()); + + newVector.clear(); + } + + @Test + public void testInvalidLengthInUtf8Views() { + try (final ViewVarCharVector viewVarCharVector = new ViewVarCharVector("myvector", allocator); + final ViewVarCharVector newViewVarCharVector = new ViewVarCharVector("newvector", allocator)) { + testInvalidLengthInViews(viewVarCharVector, newViewVarCharVector); + } + } + + @Test + public void testInvalidLengthInBinaryViews() { + try (final ViewVarBinaryVector viewVarBinaryVector = new ViewVarBinaryVector("myvector", allocator); + final ViewVarBinaryVector newViewVarBinaryVector = new ViewVarBinaryVector("newvector", allocator)) { + testInvalidLengthInViews(viewVarBinaryVector, newViewVarBinaryVector); + } + } + @Test public void testZeroStartIndexAndLength() { try (final VarCharVector varCharVector = new VarCharVector("myvector", allocator); @@ -390,6 +661,36 @@ public void testZeroStartIndexAndLength() { } } + private void testZeroStartIndexAndLengthInViews(BaseVariableWidthViewVector vector, + BaseVariableWidthViewVector newVector) { + vector.allocateNew(0, 0); + final int valueCount = 0; + populateBaseVariableWidthViewVector(vector, valueCount, null); + + final TransferPair tp = vector.makeTransferPair(newVector); + + tp.splitAndTransfer(0, 0); + assertEquals(valueCount, newVector.getValueCount()); + + newVector.clear(); + } + + @Test + public void testZeroStartIndexAndLengthInUtf8Views() { + try (final ViewVarCharVector viewVarCharVector = new ViewVarCharVector("myvector", allocator); + final ViewVarCharVector newViewVarCharVector = new ViewVarCharVector("newvector", allocator)) { + testZeroStartIndexAndLengthInViews(viewVarCharVector, newViewVarCharVector); + } + } + + @Test + public void testZeroStartIndexAndLengthInBinaryViews() { + try (final ViewVarBinaryVector viewVarBinaryVector = new ViewVarBinaryVector("myvector", allocator); + final ViewVarBinaryVector newViewVarBinaryVector = new ViewVarBinaryVector("newvector", allocator)) { + testZeroStartIndexAndLengthInViews(viewVarBinaryVector, newViewVarBinaryVector); + } + } + @Test public void testZeroLength() { try (final VarCharVector varCharVector = new VarCharVector("myvector", allocator); @@ -408,6 +709,35 @@ public void testZeroLength() { } } + private void testZeroLengthInViews(BaseVariableWidthViewVector vector, BaseVariableWidthViewVector newVector) { + vector.allocateNew(16000, 1000); + final int valueCount = 500; + populateBaseVariableWidthViewVector(vector, valueCount, null); + + final TransferPair tp = vector.makeTransferPair(newVector); + + tp.splitAndTransfer(500, 0); + assertEquals(0, newVector.getValueCount()); + + newVector.clear(); + } + + @Test + public void testZeroLengthInUtf8Views() { + try (final ViewVarCharVector viewVarCharVector = new ViewVarCharVector("myvector", allocator); + final ViewVarCharVector newViewVarCharVector = new ViewVarCharVector("newvector", allocator)) { + testZeroLengthInViews(viewVarCharVector, newViewVarCharVector); + } + } + + @Test + public void testZeroLengthInBinaryViews() { + try (final ViewVarBinaryVector viewVarBinaryVector = new ViewVarBinaryVector("myvector", allocator); + final ViewVarBinaryVector newViewVarBinaryVector = new ViewVarBinaryVector("newvector", allocator)) { + testZeroLengthInViews(viewVarBinaryVector, newViewVarBinaryVector); + } + } + @Test public void testUnionVectorZeroStartIndexAndLength() { try (final UnionVector unionVector = UnionVector.empty("myvector", allocator); diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestStructVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestStructVector.java index 68f5e14dabb9b..ccb2890863314 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestStructVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestStructVector.java @@ -17,7 +17,11 @@ package org.apache.arrow.vector; -import static org.junit.Assert.*; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertSame; import java.util.ArrayList; import java.util.HashMap; @@ -39,21 +43,20 @@ import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.FieldType; import org.apache.arrow.vector.util.TransferPair; -import org.junit.After; -import org.junit.Assert; -import org.junit.Before; -import org.junit.Test; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; public class TestStructVector { private BufferAllocator allocator; - @Before + @BeforeEach public void init() { allocator = new DirtyRootAllocator(Long.MAX_VALUE, (byte) 100); } - @After + @AfterEach public void terminate() throws Exception { allocator.close(); } @@ -64,7 +67,7 @@ public void testFieldMetadata() throws Exception { metadata.put("k1", "v1"); FieldType type = new FieldType(true, Struct.INSTANCE, null, metadata); try (StructVector vector = new StructVector("struct", allocator, type, null)) { - Assert.assertEquals(vector.getField().getMetadata(), type.getMetadata()); + assertEquals(vector.getField().getMetadata(), type.getMetadata()); } } @@ -108,8 +111,8 @@ public void testAllocateAfterReAlloc() throws Exception { /* * Verify that the buffer sizes haven't changed. */ - Assert.assertEquals(vector.getValidityBuffer().capacity(), savedValidityBufferCapacity); - Assert.assertEquals(vector.getValueCapacity(), savedValueCapacity); + assertEquals(vector.getValidityBuffer().capacity(), savedValidityBufferCapacity); + assertEquals(vector.getValueCapacity(), savedValueCapacity); } } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestTypeLayout.java b/java/vector/src/test/java/org/apache/arrow/vector/TestTypeLayout.java index 97930f433d301..5a58133f2e2bd 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestTypeLayout.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestTypeLayout.java @@ -17,82 +17,158 @@ package org.apache.arrow.vector; -import static org.junit.Assert.assertEquals; +import static org.junit.jupiter.api.Assertions.assertEquals; +import java.util.Random; + +import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.memory.RootAllocator; import org.apache.arrow.vector.types.DateUnit; import org.apache.arrow.vector.types.FloatingPointPrecision; import org.apache.arrow.vector.types.IntervalUnit; import org.apache.arrow.vector.types.TimeUnit; import org.apache.arrow.vector.types.UnionMode; import org.apache.arrow.vector.types.pojo.ArrowType; -import org.junit.Test; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; public class TestTypeLayout { + private BufferAllocator allocator; + + @BeforeEach + public void prepare() { + allocator = new RootAllocator(Integer.MAX_VALUE); + } + + @AfterEach + public void shutdown() { + allocator.close(); + } + + @Test public void testTypeBufferCount() { ArrowType type = new ArrowType.Int(8, true); - assertEquals(TypeLayout.getTypeBufferCount(type), TypeLayout.getTypeLayout(type).getBufferLayouts().size()); + assertEquals(TypeLayout.getTypeBufferCount(type), + TypeLayout.getTypeLayout(type).getBufferLayouts().size()); type = new ArrowType.Union(UnionMode.Sparse, new int[2]); - assertEquals(TypeLayout.getTypeBufferCount(type), TypeLayout.getTypeLayout(type).getBufferLayouts().size()); + assertEquals(TypeLayout.getTypeBufferCount(type), + TypeLayout.getTypeLayout(type).getBufferLayouts().size()); type = new ArrowType.Union(UnionMode.Dense, new int[1]); - assertEquals(TypeLayout.getTypeBufferCount(type), TypeLayout.getTypeLayout(type).getBufferLayouts().size()); + assertEquals(TypeLayout.getTypeBufferCount(type), + TypeLayout.getTypeLayout(type).getBufferLayouts().size()); type = new ArrowType.Struct(); - assertEquals(TypeLayout.getTypeBufferCount(type), TypeLayout.getTypeLayout(type).getBufferLayouts().size()); + assertEquals(TypeLayout.getTypeBufferCount(type), + TypeLayout.getTypeLayout(type).getBufferLayouts().size()); type = new ArrowType.Timestamp(TimeUnit.MILLISECOND, null); - assertEquals(TypeLayout.getTypeBufferCount(type), TypeLayout.getTypeLayout(type).getBufferLayouts().size()); + assertEquals(TypeLayout.getTypeBufferCount(type), + TypeLayout.getTypeLayout(type).getBufferLayouts().size()); type = new ArrowType.List(); - assertEquals(TypeLayout.getTypeBufferCount(type), TypeLayout.getTypeLayout(type).getBufferLayouts().size()); + assertEquals(TypeLayout.getTypeBufferCount(type), + TypeLayout.getTypeLayout(type).getBufferLayouts().size()); type = new ArrowType.FixedSizeList(5); - assertEquals(TypeLayout.getTypeBufferCount(type), TypeLayout.getTypeLayout(type).getBufferLayouts().size()); + assertEquals(TypeLayout.getTypeBufferCount(type), + TypeLayout.getTypeLayout(type).getBufferLayouts().size()); type = new ArrowType.Map(false); - assertEquals(TypeLayout.getTypeBufferCount(type), TypeLayout.getTypeLayout(type).getBufferLayouts().size()); + assertEquals(TypeLayout.getTypeBufferCount(type), + TypeLayout.getTypeLayout(type).getBufferLayouts().size()); type = new ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE); - assertEquals(TypeLayout.getTypeBufferCount(type), TypeLayout.getTypeLayout(type).getBufferLayouts().size()); + assertEquals(TypeLayout.getTypeBufferCount(type), + TypeLayout.getTypeLayout(type).getBufferLayouts().size()); type = new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE); - assertEquals(TypeLayout.getTypeBufferCount(type), TypeLayout.getTypeLayout(type).getBufferLayouts().size()); + assertEquals(TypeLayout.getTypeBufferCount(type), + TypeLayout.getTypeLayout(type).getBufferLayouts().size()); type = new ArrowType.Decimal(10, 10, 128); - assertEquals(TypeLayout.getTypeBufferCount(type), TypeLayout.getTypeLayout(type).getBufferLayouts().size()); + assertEquals(TypeLayout.getTypeBufferCount(type), + TypeLayout.getTypeLayout(type).getBufferLayouts().size()); type = new ArrowType.Decimal(10, 10, 256); - assertEquals(TypeLayout.getTypeBufferCount(type), TypeLayout.getTypeLayout(type).getBufferLayouts().size()); + assertEquals(TypeLayout.getTypeBufferCount(type), + TypeLayout.getTypeLayout(type).getBufferLayouts().size()); type = new ArrowType.FixedSizeBinary(5); - assertEquals(TypeLayout.getTypeBufferCount(type), TypeLayout.getTypeLayout(type).getBufferLayouts().size()); + assertEquals(TypeLayout.getTypeBufferCount(type), + TypeLayout.getTypeLayout(type).getBufferLayouts().size()); type = new ArrowType.Bool(); - assertEquals(TypeLayout.getTypeBufferCount(type), TypeLayout.getTypeLayout(type).getBufferLayouts().size()); + assertEquals(TypeLayout.getTypeBufferCount(type), + TypeLayout.getTypeLayout(type).getBufferLayouts().size()); type = new ArrowType.Binary(); - assertEquals(TypeLayout.getTypeBufferCount(type), TypeLayout.getTypeLayout(type).getBufferLayouts().size()); + assertEquals(TypeLayout.getTypeBufferCount(type), + TypeLayout.getTypeLayout(type).getBufferLayouts().size()); type = new ArrowType.Utf8(); - assertEquals(TypeLayout.getTypeBufferCount(type), TypeLayout.getTypeLayout(type).getBufferLayouts().size()); + assertEquals(TypeLayout.getTypeBufferCount(type), + TypeLayout.getTypeLayout(type).getBufferLayouts().size()); type = new ArrowType.Null(); - assertEquals(TypeLayout.getTypeBufferCount(type), TypeLayout.getTypeLayout(type).getBufferLayouts().size()); + assertEquals(TypeLayout.getTypeBufferCount(type), + TypeLayout.getTypeLayout(type).getBufferLayouts().size()); type = new ArrowType.Date(DateUnit.DAY); - assertEquals(TypeLayout.getTypeBufferCount(type), TypeLayout.getTypeLayout(type).getBufferLayouts().size()); + assertEquals(TypeLayout.getTypeBufferCount(type), + TypeLayout.getTypeLayout(type).getBufferLayouts().size()); type = new ArrowType.Time(TimeUnit.MILLISECOND, 32); - assertEquals(TypeLayout.getTypeBufferCount(type), TypeLayout.getTypeLayout(type).getBufferLayouts().size()); + assertEquals(TypeLayout.getTypeBufferCount(type), + TypeLayout.getTypeLayout(type).getBufferLayouts().size()); type = new ArrowType.Interval(IntervalUnit.DAY_TIME); - assertEquals(TypeLayout.getTypeBufferCount(type), TypeLayout.getTypeLayout(type).getBufferLayouts().size()); + assertEquals(TypeLayout.getTypeBufferCount(type), + TypeLayout.getTypeLayout(type).getBufferLayouts().size()); type = new ArrowType.Duration(TimeUnit.MILLISECOND); - assertEquals(TypeLayout.getTypeBufferCount(type), TypeLayout.getTypeLayout(type).getBufferLayouts().size()); + assertEquals(TypeLayout.getTypeBufferCount(type), + TypeLayout.getTypeLayout(type).getBufferLayouts().size()); + } + + private String generateRandomString(int length) { + Random random = new Random(); + StringBuilder sb = new StringBuilder(length); + for (int i = 0; i < length; i++) { + sb.append(random.nextInt(10)); // 0-9 + } + return sb.toString(); + } + + @Test + public void testTypeBufferCountInVectorsWithVariadicBuffers() { + // empty vector + try (ViewVarCharVector viewVarCharVector = new ViewVarCharVector("myvector", allocator)) { + ArrowType type = viewVarCharVector.getMinorType().getType(); + assertEquals(TypeLayout.getTypeBufferCount(type), + TypeLayout.getTypeLayout(type).getBufferLayouts().size()); + } + // vector with long strings + try (ViewVarCharVector viewVarCharVector = new ViewVarCharVector("myvector", allocator)) { + viewVarCharVector.allocateNew(32, 6); + + viewVarCharVector.setSafe(0, generateRandomString(8).getBytes()); + viewVarCharVector.setSafe(1, generateRandomString(12).getBytes()); + viewVarCharVector.setSafe(2, generateRandomString(14).getBytes()); + viewVarCharVector.setSafe(3, generateRandomString(18).getBytes()); + viewVarCharVector.setSafe(4, generateRandomString(22).getBytes()); + viewVarCharVector.setSafe(5, generateRandomString(24).getBytes()); + + viewVarCharVector.setValueCount(6); + + ArrowType type = viewVarCharVector.getMinorType().getType(); + assertEquals(TypeLayout.getTypeBufferCount(type), + TypeLayout.getTypeLayout(type).getBufferLayouts().size()); + } } } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestUnionVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestUnionVector.java index 1b0387feb73ff..10298112ddc98 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestUnionVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestUnionVector.java @@ -17,11 +17,11 @@ package org.apache.arrow.vector; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertNull; -import static org.junit.Assert.assertTrue; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNull; import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; import java.util.ArrayList; import java.util.HashMap; @@ -44,21 +44,21 @@ import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.FieldType; import org.apache.arrow.vector.util.TransferPair; -import org.junit.After; -import org.junit.Before; -import org.junit.Test; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; public class TestUnionVector { private static final String EMPTY_SCHEMA_PATH = ""; private BufferAllocator allocator; - @Before + @BeforeEach public void init() { allocator = new DirtyRootAllocator(Long.MAX_VALUE, (byte) 100); } - @After + @AfterEach public void terminate() throws Exception { allocator.close(); } @@ -283,8 +283,8 @@ public void testSplitAndTransfer() throws Exception { /* check the toVector output after doing the splitAndTransfer */ for (int i = 0; i < length; i++) { - assertEquals("Different data at indexes: " + (start + i) + "and " + i, sourceVector.getObject(start + i), - toVector.getObject(i)); + assertEquals(sourceVector.getObject(start + i), toVector.getObject(i), + "Different data at indexes: " + (start + i) + "and " + i); } } } @@ -373,7 +373,8 @@ public void testSplitAndTransferWithMixedVectors() throws Exception { /* check the toVector output after doing the splitAndTransfer */ for (int i = 0; i < length; i++) { - assertEquals("Different values at index: " + i, sourceVector.getObject(start + i), toVector.getObject(i)); + assertEquals(sourceVector.getObject(start + i), toVector.getObject(i), + "Different values at index: " + i); } } } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestUtils.java b/java/vector/src/test/java/org/apache/arrow/vector/TestUtils.java index 7e64dd3864636..be83e573c7c46 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestUtils.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestUtils.java @@ -29,11 +29,21 @@ public static VarCharVector newVarCharVector(String name, BufferAllocator alloca FieldType.nullable(new ArrowType.Utf8()).createNewSingleVector(name, allocator, null); } + public static ViewVarCharVector newViewVarCharVector(String name, BufferAllocator allocator) { + return (ViewVarCharVector) + FieldType.nullable(new ArrowType.Utf8View()).createNewSingleVector(name, allocator, null); + } + public static VarBinaryVector newVarBinaryVector(String name, BufferAllocator allocator) { return (VarBinaryVector) FieldType.nullable(new ArrowType.Binary()).createNewSingleVector(name, allocator, null); } + public static ViewVarBinaryVector newViewVarBinaryVector(String name, BufferAllocator allocator) { + return (ViewVarBinaryVector) + FieldType.nullable(new ArrowType.BinaryView()).createNewSingleVector(name, allocator, null); + } + public static T newVector(Class c, String name, ArrowType type, BufferAllocator allocator) { return c.cast(FieldType.nullable(type).createNewSingleVector(name, allocator, null)); } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java index 3e53512f7338f..b0d316070a335 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java @@ -21,12 +21,13 @@ import static org.apache.arrow.vector.TestUtils.newVarCharVector; import static org.apache.arrow.vector.TestUtils.newVector; import static org.apache.arrow.vector.testing.ValueVectorDataPopulator.setVector; -import static org.junit.Assert.assertArrayEquals; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertNull; -import static org.junit.Assert.assertSame; -import static org.junit.Assert.assertTrue; +import static org.junit.jupiter.api.Assertions.assertArrayEquals; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertSame; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; import java.nio.ByteBuffer; import java.nio.charset.Charset; @@ -69,9 +70,9 @@ import org.apache.arrow.vector.util.ReusableByteArray; import org.apache.arrow.vector.util.Text; import org.apache.arrow.vector.util.TransferPair; -import org.junit.After; -import org.junit.Before; -import org.junit.Test; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; public class TestValueVector { @@ -79,7 +80,7 @@ public class TestValueVector { private BufferAllocator allocator; - @Before + @BeforeEach public void init() { allocator = new RootAllocator(Long.MAX_VALUE); } @@ -95,7 +96,7 @@ public void init() { (int) (Integer.getInteger("arrow.vector.max_allocation_bytes", Integer.MAX_VALUE) / 7); private static final int MAX_VALUE_COUNT_8BYTE = (int) (MAX_VALUE_COUNT / 2); - @After + @AfterEach public void terminate() throws Exception { allocator.close(); } @@ -188,7 +189,7 @@ public void testFixedType1() { /* vector data should have been zeroed out */ for (int i = 0; i < capacityBeforeReset; i++) { // TODO: test vector.get(i) is 0 after unsafe get added - assertEquals("non-zero data not expected at index: " + i, true, vector.isNull(i)); + assertTrue(vector.isNull(i), "non-zero data not expected at index: " + i); } } } @@ -276,7 +277,7 @@ public void testFixedType2() { /* check vector contents */ j = 1; for (int i = 0; i < initialCapacity; i += 2) { - assertEquals("unexpected value at index: " + i, j, intVector.get(i)); + assertEquals(j, intVector.get(i), "unexpected value at index: " + i); j++; } @@ -298,7 +299,7 @@ public void testFixedType2() { /* vector data should still be intact after realloc */ j = 1; for (int i = 0; i <= initialCapacity; i += 2) { - assertEquals("unexpected value at index: " + i, j, intVector.get(i)); + assertEquals(j, intVector.get(i), "unexpected value at index: " + i); j++; } @@ -311,7 +312,7 @@ public void testFixedType2() { /* vector data should have been zeroed out */ for (int i = 0; i < capacityBeforeRealloc; i++) { - assertEquals("non-zero data not expected at index: " + i, true, intVector.isNull(i)); + assertTrue(intVector.isNull(i), "non-zero data not expected at index: " + i); } } } @@ -427,7 +428,7 @@ public void testFixedFloat2() { /* vector data should be zeroed out */ for (int i = 0; i < capacityBeforeReset; i++) { - assertTrue("non-zero data not expected at index: " + i, floatVector.isNull(i)); + assertTrue(floatVector.isNull(i), "non-zero data not expected at index: " + i); } } } @@ -526,7 +527,7 @@ public void testFixedFloat2WithPossibleTruncate() { /* vector data should be zeroed out */ for (int i = 0; i < capacityBeforeReset; i++) { - assertTrue("non-zero data not expected at index: " + i, floatVector.isNull(i)); + assertTrue(floatVector.isNull(i), "non-zero data not expected at index: " + i); } } } @@ -626,7 +627,7 @@ public void testFixedType3() { /* vector data should be zeroed out */ for (int i = 0; i < capacityBeforeReset; i++) { - assertEquals("non-zero data not expected at index: " + i, true, floatVector.isNull(i)); + assertTrue(floatVector.isNull(i), "non-zero data not expected at index: " + i); } } } @@ -724,7 +725,7 @@ public void testFixedType4() { /* vector data should be zeroed out */ for (int i = 0; i < capacityBeforeReset; i++) { - assertEquals("non-zero data not expected at index: " + i, true, floatVector.isNull(i)); + assertTrue(floatVector.isNull(i), "non-zero data not expected at index: " + i); } } } @@ -821,7 +822,7 @@ public void testNullableFixedType1() { /* vector data should be zeroed out */ for (int i = 0; i < capacityBeforeReset; i++) { - assertTrue("non-null data not expected at index: " + i, vector.isNull(i)); + assertTrue(vector.isNull(i), "non-null data not expected at index: " + i); } } } @@ -920,7 +921,7 @@ public void testNullableFixedType2() { /* vector data should be zeroed out */ for (int i = 0; i < capacityBeforeReset; i++) { - assertTrue("non-null data not expected at index: " + i, vector.isNull(i)); + assertTrue(vector.isNull(i), "non-null data not expected at index: " + i); } } } @@ -949,10 +950,10 @@ public void testNullableFixedType3() { int j = 1; for (int i = 0; i <= 1023; i++) { if ((i >= 2 && i <= 99) || (i >= 101 && i <= 1021)) { - assertTrue("non-null data not expected at index: " + i, vector.isNull(i)); + assertTrue(vector.isNull(i), "non-null data not expected at index: " + i); } else { - assertFalse("null data not expected at index: " + i, vector.isNull(i)); - assertEquals("unexpected value at index: " + i, j, vector.get(i)); + assertFalse(vector.isNull(i), "null data not expected at index: " + i); + assertEquals(j, vector.get(i), "unexpected value at index: " + i); j++; } } @@ -987,10 +988,10 @@ public void testNullableFixedType3() { j = 1; for (int i = 0; i < (initialCapacity * 2); i++) { if ((i > 1023 && i != initialCapacity) || (i >= 2 && i <= 99) || (i >= 101 && i <= 1021)) { - assertTrue("non-null data not expected at index: " + i, vector.isNull(i)); + assertTrue(vector.isNull(i), "non-null data not expected at index: " + i); } else { - assertFalse("null data not expected at index: " + i, vector.isNull(i)); - assertEquals("unexpected value at index: " + i, j, vector.get(i)); + assertFalse(vector.isNull(i), "null data not expected at index: " + i); + assertEquals(j, vector.get(i), "unexpected value at index: " + i); j++; } } @@ -1004,13 +1005,13 @@ public void testNullableFixedType3() { /* vector data should have been zeroed out */ for (int i = 0; i < capacityBeforeReset; i++) { - assertTrue("non-null data not expected at index: " + i, vector.isNull(i)); + assertTrue(vector.isNull(i), "non-null data not expected at index: " + i); } vector.allocateNew(initialCapacity * 4); // vector has been erased for (int i = 0; i < initialCapacity * 4; i++) { - assertTrue("non-null data not expected at index: " + i, vector.isNull(i)); + assertTrue(vector.isNull(i), "non-null data not expected at index: " + i); } } } @@ -1036,10 +1037,10 @@ public void testNullableFixedType4() { for (int i = 0; i < valueCapacity; i++) { if ((i & 1) == 1) { - assertFalse("unexpected null value at index: " + i, vector.isNull(i)); - assertEquals("unexpected value at index: " + i, (baseValue + i), vector.get(i)); + assertFalse(vector.isNull(i), "unexpected null value at index: " + i); + assertEquals((baseValue + i), vector.get(i), "unexpected value at index: " + i); } else { - assertTrue("unexpected non-null value at index: " + i, vector.isNull(i)); + assertTrue(vector.isNull(i), "unexpected non-null value at index: " + i); } } @@ -1048,15 +1049,15 @@ public void testNullableFixedType4() { for (int i = 0; i < vector.getValueCapacity(); i++) { if (i == valueCapacity) { - assertFalse("unexpected null value at index: " + i, vector.isNull(i)); - assertEquals("unexpected value at index: " + i, 20000000, vector.get(i)); + assertFalse(vector.isNull(i), "unexpected null value at index: " + i); + assertEquals(20000000, vector.get(i), "unexpected value at index: " + i); } else if (i < valueCapacity) { if ((i & 1) == 1) { - assertFalse("unexpected null value at index: " + i, vector.isNull(i)); - assertEquals("unexpected value at index: " + i, (baseValue + i), vector.get(i)); + assertFalse(vector.isNull(i), "unexpected null value at index: " + i); + assertEquals((baseValue + i), vector.get(i), "unexpected value at index: " + i); } } else { - assertTrue("unexpected non-null value at index: " + i, vector.isNull(i)); + assertTrue(vector.isNull(i), "unexpected non-null value at index: " + i); } } @@ -1068,10 +1069,10 @@ public void testNullableFixedType4() { for (int i = 0; i < vector.getValueCapacity(); i++) { if (i % 2 == 0) { - assertFalse("unexpected null value at index: " + i, vector.isNull(i)); - assertEquals("unexpected value at index: " + i, (baseValue + i), vector.get(i)); + assertFalse(vector.isNull(i), "unexpected null value at index: " + i); + assertEquals((baseValue + i), vector.get(i), "unexpected value at index: " + i); } else { - assertTrue("unexpected non-null value at index: " + i, vector.isNull(i)); + assertTrue(vector.isNull(i), "unexpected non-null value at index: " + i); } } @@ -1081,13 +1082,13 @@ public void testNullableFixedType4() { for (int i = 0; i < vector.getValueCapacity(); i++) { if (i == (valueCapacityBeforeRealloc + 1000)) { - assertFalse("unexpected null value at index: " + i, vector.isNull(i)); - assertEquals("unexpected value at index: " + i, 400000000, vector.get(i)); + assertFalse(vector.isNull(i), "unexpected null value at index: " + i); + assertEquals(400000000, vector.get(i), "unexpected value at index: " + i); } else if (i < valueCapacityBeforeRealloc && (i % 2) == 0) { - assertFalse("unexpected null value at index: " + i, vector.isNull(i)); - assertEquals("unexpected value at index: " + i, baseValue + i, vector.get(i)); + assertFalse(vector.isNull(i), "unexpected null value at index: " + i); + assertEquals(baseValue + i, vector.get(i), "unexpected value at index: " + i); } else { - assertTrue("unexpected non-null value at index: " + i, vector.isNull(i)); + assertTrue(vector.isNull(i), "unexpected non-null value at index: " + i); } } @@ -1100,7 +1101,7 @@ public void testNullableFixedType4() { /* vector data should be zeroed out */ for (int i = 0; i < valueCapacityBeforeReset; i++) { - assertTrue("non-null data not expected at index: " + i, vector.isNull(i)); + assertTrue(vector.isNull(i), "non-null data not expected at index: " + i); } } } @@ -1347,23 +1348,24 @@ public void testNullableVarType2() { } } - @Test(expected = OversizedAllocationException.class) + @Test public void testReallocateCheckSuccess() { + assertThrows(OversizedAllocationException.class, () -> { + // Create a new value vector for 1024 integers. + try (final VarBinaryVector vector = newVarBinaryVector(EMPTY_SCHEMA_PATH, allocator)) { + vector.allocateNew(1024 * 10, 1024); - // Create a new value vector for 1024 integers. - try (final VarBinaryVector vector = newVarBinaryVector(EMPTY_SCHEMA_PATH, allocator)) { - vector.allocateNew(1024 * 10, 1024); + vector.set(0, STR1); + // Check the sample strings. + assertArrayEquals(STR1, vector.get(0)); - vector.set(0, STR1); - // Check the sample strings. - assertArrayEquals(STR1, vector.get(0)); + // update the index offset to a larger one + ArrowBuf offsetBuf = vector.getOffsetBuffer(); + offsetBuf.setInt(VarBinaryVector.OFFSET_WIDTH, Integer.MAX_VALUE - 5); - // update the index offset to a larger one - ArrowBuf offsetBuf = vector.getOffsetBuffer(); - offsetBuf.setInt(VarBinaryVector.OFFSET_WIDTH, Integer.MAX_VALUE - 5); - - vector.setValueLengthSafe(1, 6); - } + vector.setValueLengthSafe(1, 6); + } + }); } @Test @@ -1551,9 +1553,9 @@ public void testReallocAfterVectorTransfer2() { /* check toVector contents before realloc */ for (int i = 0; i < toVector.getValueCapacity(); i++) { - assertFalse("unexpected null value at index: " + i, toVector.isNull(i)); + assertFalse(toVector.isNull(i), "unexpected null value at index: " + i); double value = toVector.get(i); - assertEquals("unexpected value at index: " + i, baseValue + (double) i, value, 0); + assertEquals(baseValue + (double) i, value, 0, "unexpected value at index: " + i); } /* now let's realloc the toVector and check contents again */ @@ -1562,11 +1564,11 @@ public void testReallocAfterVectorTransfer2() { for (int i = 0; i < toVector.getValueCapacity(); i++) { if (i < capacityAfterRealloc2) { - assertFalse("unexpected null value at index: " + i, toVector.isNull(i)); + assertFalse(toVector.isNull(i), "unexpected null value at index: " + i); double value = toVector.get(i); - assertEquals("unexpected value at index: " + i, baseValue + (double) i, value, 0); + assertEquals(baseValue + (double) i, value, 0, "unexpected value at index: " + i); } else { - assertTrue("unexpected non-null value at index: " + i, toVector.isNull(i)); + assertTrue(toVector.isNull(i), "unexpected non-null value at index: " + i); } } @@ -1921,7 +1923,7 @@ public void testCopyFromWithNulls() { if (i % 3 == 0) { assertNull(vector.getObject(i)); } else { - assertEquals("unexpected value at index: " + i, Integer.toString(i), vector.getObject(i).toString()); + assertEquals(Integer.toString(i), vector.getObject(i).toString(), "unexpected value at index: " + i); } } @@ -1935,7 +1937,7 @@ public void testCopyFromWithNulls() { if (i % 3 == 0) { assertNull(vector2.getObject(i)); } else { - assertEquals("unexpected value at index: " + i, Integer.toString(i), vector2.getObject(i).toString()); + assertEquals(Integer.toString(i), vector2.getObject(i).toString(), "unexpected value at index: " + i); } } @@ -1948,7 +1950,7 @@ public void testCopyFromWithNulls() { if (i % 3 == 0) { assertNull(vector2.getObject(i)); } else { - assertEquals("unexpected value at index: " + i, Integer.toString(i), vector2.getObject(i).toString()); + assertEquals(Integer.toString(i), vector2.getObject(i).toString(), "unexpected value at index: " + i); } } } @@ -1982,7 +1984,7 @@ public void testCopyFromWithNulls1() { if (i % 3 == 0) { assertNull(vector.getObject(i)); } else { - assertEquals("unexpected value at index: " + i, Integer.toString(i), vector.getObject(i).toString()); + assertEquals(Integer.toString(i), vector.getObject(i).toString(), "unexpected value at index: " + i); } } @@ -2000,7 +2002,7 @@ public void testCopyFromWithNulls1() { if (i % 3 == 0) { assertNull(vector2.getObject(i)); } else { - assertEquals("unexpected value at index: " + i, Integer.toString(i), vector2.getObject(i).toString()); + assertEquals(Integer.toString(i), vector2.getObject(i).toString(), "unexpected value at index: " + i); } } @@ -2013,7 +2015,7 @@ public void testCopyFromWithNulls1() { if (i % 3 == 0) { assertNull(vector2.getObject(i)); } else { - assertEquals("unexpected value at index: " + i, Integer.toString(i), vector2.getObject(i).toString()); + assertEquals(Integer.toString(i), vector2.getObject(i).toString(), "unexpected value at index: " + i); } } } @@ -3021,16 +3023,18 @@ public void testUnionVectorEquals() { } } - @Test(expected = IllegalArgumentException.class) + @Test public void testEqualsWithIndexOutOfRange() { - try (final IntVector vector1 = new IntVector("int", allocator); - final IntVector vector2 = new IntVector("int", allocator)) { + assertThrows(IllegalArgumentException.class, () -> { + try (final IntVector vector1 = new IntVector("int", allocator); + final IntVector vector2 = new IntVector("int", allocator)) { - setVector(vector1, 1, 2); - setVector(vector2, 1, 2); + setVector(vector1, 1, 2); + setVector(vector2, 1, 2); - assertTrue(new RangeEqualsVisitor(vector1, vector2).rangeEquals(new Range(2, 3, 1))); - } + assertTrue(new RangeEqualsVisitor(vector1, vector2).rangeEquals(new Range(2, 3, 1))); + } + }); } @Test @@ -3398,4 +3402,99 @@ public void testSetGetUInt4() { assertEquals(expected, vector.getValueAsLong(1)); } } + + @Test + public void testSplitAndTransferFixedWithVector1() { + RootAllocator allocator = new RootAllocator(Long.MAX_VALUE); + try (BufferAllocator child = allocator.newChildAllocator("child", 0, Long.MAX_VALUE)) { + try (IntVector vector = new IntVector("vector", child)) { + vector.setSafe(0, 1); + vector.setSafe(1, 2); + vector.setSafe(2, 3); + vector.setValueCount(3); + + TransferPair transferPair = vector.getTransferPair(allocator); + transferPair.splitAndTransfer(0, 1); + try (IntVector target = (IntVector) transferPair.getTo()) { + // no-op try-with-resource + assertEquals(1, target.get(0)); + } + } + } + } + + @Test + public void testSplitAndTransferFixedWithVector2() { + IntVector target; + try (BufferAllocator child = allocator.newChildAllocator("child", 0, Long.MAX_VALUE)) { + try (IntVector vector = new IntVector("source", child)) { + vector.setSafe(0, 1); + vector.setSafe(1, 2); + vector.setSafe(2, 3); + vector.setValueCount(3); + + TransferPair transferPair = vector.getTransferPair(allocator); + transferPair.splitAndTransfer(0, 1); + target = (IntVector) transferPair.getTo(); + assertEquals(1, target.get(0)); + } + } + target.close(); + } + + @Test + public void testVectorLoadUnloadOnNonVariadicVectors() { + + try (final IntVector vector1 = new IntVector("myvector", allocator)) { + + setVector(vector1, 1, 2, 3, 4, 5, 6); + vector1.setValueCount(15); + + /* Check the vector output */ + assertEquals(1, vector1.get(0)); + assertEquals(2, vector1.get(1)); + assertEquals(3, vector1.get(2)); + assertEquals(4, vector1.get(3)); + assertEquals(5, vector1.get(4)); + assertEquals(6, vector1.get(5)); + + Field field = vector1.getField(); + String fieldName = field.getName(); + + List fields = new ArrayList<>(); + List fieldVectors = new ArrayList<>(); + + fields.add(field); + fieldVectors.add(vector1); + + Schema schema = new Schema(fields); + + VectorSchemaRoot schemaRoot1 = new VectorSchemaRoot(schema, fieldVectors, vector1.getValueCount()); + VectorUnloader vectorUnloader = new VectorUnloader(schemaRoot1); + + try ( + ArrowRecordBatch recordBatch = vectorUnloader.getRecordBatch(); + BufferAllocator finalVectorsAllocator = allocator.newChildAllocator("new vector", 0, Long.MAX_VALUE); + VectorSchemaRoot schemaRoot2 = VectorSchemaRoot.create(schema, finalVectorsAllocator); + ) { + + // validating recordBatch doesn't contain an output for variadicBufferCounts + assertTrue(recordBatch.getVariadicBufferCounts().isEmpty()); + + VectorLoader vectorLoader = new VectorLoader(schemaRoot2); + vectorLoader.load(recordBatch); + + IntVector vector2 = (IntVector) schemaRoot2.getVector(fieldName); + vector2.setValueCount(25); + + /* Check the vector output */ + assertEquals(1, vector2.get(0)); + assertEquals(2, vector2.get(1)); + assertEquals(3, vector2.get(2)); + assertEquals(4, vector2.get(3)); + assertEquals(5, vector2.get(4)); + assertEquals(6, vector2.get(5)); + } + } + } } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestVarCharListVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestVarCharListVector.java index bfe489fa5af4e..6d4e64837adbc 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestVarCharListVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestVarCharListVector.java @@ -17,6 +17,8 @@ package org.apache.arrow.vector; +import static org.junit.jupiter.api.Assertions.assertEquals; + import java.nio.charset.StandardCharsets; import org.apache.arrow.memory.ArrowBuf; @@ -25,21 +27,20 @@ import org.apache.arrow.vector.complex.impl.UnionListWriter; import org.apache.arrow.vector.types.Types; import org.apache.arrow.vector.types.pojo.FieldType; -import org.junit.After; -import org.junit.Assert; -import org.junit.Before; -import org.junit.Test; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; public class TestVarCharListVector { private BufferAllocator allocator; - @Before + @BeforeEach public void init() { allocator = new DirtyRootAllocator(Long.MAX_VALUE, (byte) 100); } - @After + @AfterEach public void terminate() throws Exception { allocator.close(); } @@ -72,8 +73,8 @@ public void testVarCharListWithNulls() { writer.setValueCount(2); - Assert.assertEquals(2, vector.getValueCount()); - Assert.assertEquals(2, vector.getDataVector().getValueCount()); + assertEquals(2, vector.getValueCount()); + assertEquals(2, vector.getDataVector().getValueCount()); } } } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestVarCharViewVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestVarCharViewVector.java new file mode 100644 index 0000000000000..ebf9b58da7b40 --- /dev/null +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestVarCharViewVector.java @@ -0,0 +1,2427 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.vector; + +import static org.apache.arrow.vector.TestUtils.newVector; +import static org.apache.arrow.vector.TestUtils.newViewVarBinaryVector; +import static org.apache.arrow.vector.TestUtils.newViewVarCharVector; +import static org.apache.arrow.vector.testing.ValueVectorDataPopulator.setVector; +import static org.junit.jupiter.api.Assertions.assertArrayEquals; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertSame; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import java.nio.ByteBuffer; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Objects; +import java.util.Random; +import java.util.function.BiConsumer; +import java.util.function.Function; +import java.util.stream.IntStream; +import java.util.stream.Stream; + +import org.apache.arrow.memory.ArrowBuf; +import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.memory.RootAllocator; +import org.apache.arrow.memory.rounding.DefaultRoundingPolicy; +import org.apache.arrow.memory.util.ArrowBufPointer; +import org.apache.arrow.memory.util.CommonUtil; +import org.apache.arrow.vector.ipc.message.ArrowRecordBatch; +import org.apache.arrow.vector.testing.ValueVectorDataPopulator; +import org.apache.arrow.vector.types.Types; +import org.apache.arrow.vector.types.pojo.Field; +import org.apache.arrow.vector.types.pojo.Schema; +import org.apache.arrow.vector.util.ReusableByteArray; +import org.apache.arrow.vector.util.Text; +import org.apache.arrow.vector.util.TransferPair; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; + + +public class TestVarCharViewVector { + + // short string (length <= 12) + private static final byte[] STR0 = "0123456".getBytes(StandardCharsets.UTF_8); + // short string (length <= 12) + private static final byte[] STR1 = "012345678912".getBytes(StandardCharsets.UTF_8); + // long string (length > 12) + private static final byte[] STR2 = "0123456789123".getBytes(StandardCharsets.UTF_8); + // long string (length > 12) + private static final byte[] STR3 = "01234567891234567".getBytes(StandardCharsets.UTF_8); + // short string (length <= 12) + private static final byte[] STR4 = "01234567".getBytes(StandardCharsets.UTF_8); + // short string (length <= 12) + private static final byte[] STR5 = "A1234A".getBytes(StandardCharsets.UTF_8); + // short string (length <= 12) + private static final byte[] STR6 = "B1234567B".getBytes(StandardCharsets.UTF_8); + // long string (length > 12) + private static final byte[] STR7 = "K01234567891234567K".getBytes(StandardCharsets.UTF_8); + // long string (length > 12) + private static final byte[] STR8 = "M012345678912345678M".getBytes(StandardCharsets.UTF_8); + private static final String EMPTY_SCHEMA_PATH = ""; + + private BufferAllocator allocator; + + private Random random; + + @BeforeEach + public void prepare() { + allocator = new RootAllocator(Integer.MAX_VALUE); + random = new Random(); + } + + @AfterEach + public void shutdown() { + allocator.close(); + } + + public static void setBytes(int index, byte[] bytes, ViewVarCharVector vector) { + BitVectorHelper.setBit(vector.validityBuffer, index); + vector.setBytes(index, bytes, 0, bytes.length); + } + + @Test + public void testInlineAllocation() { + try (final ViewVarCharVector viewVarCharVector = new ViewVarCharVector("myvector", allocator)) { + viewVarCharVector.allocateNew(48, 3); + final int valueCount = 3; + viewVarCharVector.set(0, STR0); + viewVarCharVector.set(1, STR1); + viewVarCharVector.set(2, STR4); + viewVarCharVector.setValueCount(valueCount); + + byte[] view1 = viewVarCharVector.get(0); + byte[] view2 = viewVarCharVector.get(1); + byte[] view3 = viewVarCharVector.get(2); + + assertNotNull(view1); + assertNotNull(view2); + assertNotNull(view3); + + String str1 = new String(STR0, StandardCharsets.UTF_8); + String str2 = new String(STR1, StandardCharsets.UTF_8); + String str3 = new String(STR4, StandardCharsets.UTF_8); + + assertEquals(new String(view1, StandardCharsets.UTF_8), str1); + assertEquals(new String(view2, StandardCharsets.UTF_8), str2); + assertEquals(new String(view3, StandardCharsets.UTF_8), str3); + + assertTrue(viewVarCharVector.dataBuffers.isEmpty()); + + assertEquals(new String(Objects.requireNonNull(viewVarCharVector.getObject(0)).getBuffer(), + StandardCharsets.UTF_8), str1); + assertEquals(new String(Objects.requireNonNull(viewVarCharVector.getObject(1)).getBuffer(), + StandardCharsets.UTF_8), str2); + assertEquals(new String(Objects.requireNonNull(viewVarCharVector.getObject(2)).getBuffer(), + StandardCharsets.UTF_8), str3); + } + } + + @Test + public void testDataBufferBasedAllocationInSameBuffer() { + try (final ViewVarCharVector viewVarCharVector = new ViewVarCharVector("myvector", allocator)) { + viewVarCharVector.allocateNew(48, 4); + final int valueCount = 4; + String str4 = generateRandomString(34); + viewVarCharVector.set(0, STR1); + viewVarCharVector.set(1, STR2); + viewVarCharVector.set(2, STR3); + viewVarCharVector.set(3, str4.getBytes(StandardCharsets.UTF_8)); + viewVarCharVector.setValueCount(valueCount); + + byte[] view1 = viewVarCharVector.get(0); + byte[] view2 = viewVarCharVector.get(1); + byte[] view3 = viewVarCharVector.get(2); + byte[] view4 = viewVarCharVector.get(3); + + assertNotNull(view1); + assertNotNull(view2); + assertNotNull(view3); + assertNotNull(view4); + + String str1 = new String(STR1, StandardCharsets.UTF_8); + String str2 = new String(STR2, StandardCharsets.UTF_8); + String str3 = new String(STR3, StandardCharsets.UTF_8); + + assertEquals(new String(view1, StandardCharsets.UTF_8), str1); + assertEquals(new String(view2, StandardCharsets.UTF_8), str2); + assertEquals(new String(view3, StandardCharsets.UTF_8), str3); + assertEquals(new String(view4, StandardCharsets.UTF_8), str4); + + assertEquals(1, viewVarCharVector.dataBuffers.size()); + + assertEquals(new String(Objects.requireNonNull(viewVarCharVector.getObject(0)).getBuffer(), + StandardCharsets.UTF_8), str1); + assertEquals(new String(Objects.requireNonNull(viewVarCharVector.getObject(1)).getBuffer(), + StandardCharsets.UTF_8), str2); + assertEquals(new String(Objects.requireNonNull(viewVarCharVector.getObject(2)).getBuffer(), + StandardCharsets.UTF_8), str3); + assertEquals(new String(Objects.requireNonNull(viewVarCharVector.getObject(3)).getBuffer(), + StandardCharsets.UTF_8), str4); + } + } + + @Test + public void testDataBufferBasedAllocationInOtherBuffer() { + try (final ViewVarCharVector viewVarCharVector = new ViewVarCharVector("myvector", allocator)) { + viewVarCharVector.allocateNew(48, 4); + final int valueCount = 4; + String str4 = generateRandomString(35); + viewVarCharVector.set(0, STR1); + viewVarCharVector.set(1, STR2); + viewVarCharVector.set(2, STR3); + viewVarCharVector.set(3, str4.getBytes(StandardCharsets.UTF_8)); + viewVarCharVector.setValueCount(valueCount); + + byte[] view1 = viewVarCharVector.get(0); + byte[] view2 = viewVarCharVector.get(1); + byte[] view3 = viewVarCharVector.get(2); + byte[] view4 = viewVarCharVector.get(3); + + assertNotNull(view1); + assertNotNull(view2); + assertNotNull(view3); + assertNotNull(view4); + + String str1 = new String(STR1, StandardCharsets.UTF_8); + String str2 = new String(STR2, StandardCharsets.UTF_8); + String str3 = new String(STR3, StandardCharsets.UTF_8); + + assertEquals(new String(view1, StandardCharsets.UTF_8), str1); + assertEquals(new String(view2, StandardCharsets.UTF_8), str2); + assertEquals(new String(view3, StandardCharsets.UTF_8), str3); + assertEquals(new String(view4, StandardCharsets.UTF_8), str4); + + assertEquals(2, viewVarCharVector.dataBuffers.size()); + + assertEquals(new String(Objects.requireNonNull(viewVarCharVector.getObject(0)).getBuffer(), + StandardCharsets.UTF_8), str1); + assertEquals(new String(Objects.requireNonNull(viewVarCharVector.getObject(1)).getBuffer(), + StandardCharsets.UTF_8), str2); + assertEquals(new String(Objects.requireNonNull(viewVarCharVector.getObject(2)).getBuffer(), + StandardCharsets.UTF_8), str3); + assertEquals(new String(Objects.requireNonNull(viewVarCharVector.getObject(3)).getBuffer(), + StandardCharsets.UTF_8), str4); + } + } + + @Test + public void testMixedAllocation() { + try (final ViewVarCharVector viewVarCharVector = new ViewVarCharVector("myvector", allocator)) { + viewVarCharVector.allocateNew(128, 6); + final int valueCount = 6; + String str4 = generateRandomString(35); + String str6 = generateRandomString(40); + viewVarCharVector.set(0, STR1); + viewVarCharVector.set(1, STR2); + viewVarCharVector.set(2, STR3); + viewVarCharVector.set(3, str4.getBytes(StandardCharsets.UTF_8)); + viewVarCharVector.set(4, STR1); + viewVarCharVector.set(5, str6.getBytes(StandardCharsets.UTF_8)); + viewVarCharVector.setValueCount(valueCount); + + byte[] view1 = viewVarCharVector.get(0); + byte[] view2 = viewVarCharVector.get(1); + byte[] view3 = viewVarCharVector.get(2); + byte[] view4 = viewVarCharVector.get(3); + byte[] view5 = viewVarCharVector.get(4); + byte[] view6 = viewVarCharVector.get(5); + + assertNotNull(view1); + assertNotNull(view2); + assertNotNull(view3); + assertNotNull(view4); + assertNotNull(view5); + assertNotNull(view6); + + String str1 = new String(STR1, StandardCharsets.UTF_8); + String str2 = new String(STR2, StandardCharsets.UTF_8); + String str3 = new String(STR3, StandardCharsets.UTF_8); + + assertEquals(new String(view1, StandardCharsets.UTF_8), str1); + assertEquals(new String(view2, StandardCharsets.UTF_8), str2); + assertEquals(new String(view3, StandardCharsets.UTF_8), str3); + assertEquals(new String(view4, StandardCharsets.UTF_8), str4); + assertEquals(new String(view5, StandardCharsets.UTF_8), str1); + assertEquals(new String(view6, StandardCharsets.UTF_8), str6); + + assertEquals(1, viewVarCharVector.dataBuffers.size()); + + assertEquals(new String(Objects.requireNonNull(viewVarCharVector.getObject(0)).getBuffer(), + StandardCharsets.UTF_8), str1); + assertEquals(new String(Objects.requireNonNull(viewVarCharVector.getObject(1)).getBuffer(), + StandardCharsets.UTF_8), str2); + assertEquals(new String(Objects.requireNonNull(viewVarCharVector.getObject(2)).getBuffer(), + StandardCharsets.UTF_8), str3); + assertEquals(new String(Objects.requireNonNull(viewVarCharVector.getObject(3)).getBuffer(), + StandardCharsets.UTF_8), str4); + assertEquals(new String(Objects.requireNonNull(viewVarCharVector.getObject(4)).getBuffer(), + StandardCharsets.UTF_8), str1); + assertEquals(new String(Objects.requireNonNull(viewVarCharVector.getObject(5)).getBuffer(), + StandardCharsets.UTF_8), str6); + } + } + + @Test + public void testAllocationIndexOutOfBounds() { + assertThrows(IndexOutOfBoundsException.class, () -> { + try (final ViewVarCharVector viewVarCharVector = new ViewVarCharVector("myvector", allocator)) { + viewVarCharVector.allocateNew(32, 3); + final int valueCount = 3; + viewVarCharVector.set(0, STR1); + viewVarCharVector.set(1, STR2); + viewVarCharVector.set(2, STR2); + viewVarCharVector.setValueCount(valueCount); + } + }); + } + + @Test + public void testSizeOfViewBufferElements() { + try (final ViewVarCharVector vector = new ViewVarCharVector(EMPTY_SCHEMA_PATH, allocator)) { + int valueCount = 100; + int currentSize = 0; + vector.setInitialCapacity(valueCount); + vector.allocateNew(); + vector.setValueCount(valueCount); + for (int i = 0; i < valueCount; i++) { + currentSize += i; + vector.setSafe(i, new byte[i]); + } + assertEquals(currentSize, vector.sizeOfViewBufferElements()); + } + } + + @Test + public void testNullableVarType1() { + + // Create a new value vector for 1024 integers. + try (final ViewVarCharVector vector = newViewVarCharVector(EMPTY_SCHEMA_PATH, allocator)) { + vector.allocateNew(1024 * 10, 1024); + + vector.set(0, STR1); + vector.set(1, STR2); + vector.set(2, STR3); + vector.setSafe(3, STR3, 1, STR3.length - 1); + vector.setSafe(4, STR3, 2, STR3.length - 2); + ByteBuffer str3ByteBuffer = ByteBuffer.wrap(STR3); + vector.setSafe(5, str3ByteBuffer, 1, STR3.length - 1); + vector.setSafe(6, str3ByteBuffer, 2, STR3.length - 2); + + // Set with convenience function + Text txt = new Text("foo"); + vector.setSafe(7, txt.getBytes(), 0, (int) txt.getLength()); + + // Check the sample strings. + assertArrayEquals(STR1, vector.get(0)); + assertArrayEquals(STR2, vector.get(1)); + assertArrayEquals(STR3, vector.get(2)); + assertArrayEquals(Arrays.copyOfRange(STR3, 1, STR3.length), vector.get(3)); + assertArrayEquals(Arrays.copyOfRange(STR3, 2, STR3.length), vector.get(4)); + assertArrayEquals(Arrays.copyOfRange(STR3, 1, STR3.length), vector.get(5)); + assertArrayEquals(Arrays.copyOfRange(STR3, 2, STR3.length), vector.get(6)); + + // Check returning a Text object + assertEquals(txt, vector.getObject(7)); + + // Ensure null value throws. + assertNull(vector.get(8)); + } + } + + @Test + public void testGetTextRepeatedly() { + try (final ViewVarCharVector vector = new ViewVarCharVector("myviewvector", allocator)) { + ValueVectorDataPopulator.setVector(vector, STR1, STR2); + vector.setValueCount(2); + + /* check the vector output */ + Text text = new Text(); + vector.read(0, text); + assertArrayEquals(STR1, text.getBytes()); + vector.read(1, text); + assertArrayEquals(STR2, text.getBytes()); + } + } + + @Test + public void testNullableVarType2() { + try (final ViewVarBinaryVector vector = newViewVarBinaryVector(EMPTY_SCHEMA_PATH, allocator)) { + vector.allocateNew(1024 * 10, 1024); + vector.set(0, STR1); + vector.set(1, STR2); + vector.set(2, STR3); + vector.setSafe(3, STR3, 1, STR3.length - 1); + vector.setSafe(4, STR3, 2, STR3.length - 2); + ByteBuffer str3ByteBuffer = ByteBuffer.wrap(STR3); + vector.setSafe(5, str3ByteBuffer, 1, STR3.length - 1); + vector.setSafe(6, str3ByteBuffer, 2, STR3.length - 2); + + // Check the sample strings. + assertArrayEquals(STR1, vector.get(0)); + assertArrayEquals(STR2, vector.get(1)); + assertArrayEquals(STR3, vector.get(2)); + assertArrayEquals(Arrays.copyOfRange(STR3, 1, STR3.length), vector.get(3)); + assertArrayEquals(Arrays.copyOfRange(STR3, 2, STR3.length), vector.get(4)); + assertArrayEquals(Arrays.copyOfRange(STR3, 1, STR3.length), vector.get(5)); + assertArrayEquals(Arrays.copyOfRange(STR3, 2, STR3.length), vector.get(6)); + + // Ensure null value throws. + assertNull(vector.get(7)); + } + } + + @Test + public void testGetBytesRepeatedly() { + try (ViewVarBinaryVector vector = new ViewVarBinaryVector("", allocator)) { + vector.allocateNew(5, 1); + + final String str = "hello world!!!"; + final String str2 = "foo"; + vector.setSafe(0, str.getBytes(StandardCharsets.UTF_8)); + vector.setSafe(1, str2.getBytes(StandardCharsets.UTF_8)); + + // verify results + ReusableByteArray reusableByteArray = new ReusableByteArray(); + vector.read(0, reusableByteArray); + assertArrayEquals( + str.getBytes(StandardCharsets.UTF_8), + Arrays.copyOfRange( + reusableByteArray.getBuffer(), 0, (int) reusableByteArray.getLength())); + byte[] oldBuffer = reusableByteArray.getBuffer(); + + vector.read(1, reusableByteArray); + assertArrayEquals( + str2.getBytes(StandardCharsets.UTF_8), + Arrays.copyOfRange( + reusableByteArray.getBuffer(), 0, (int) reusableByteArray.getLength())); + + // There should not have been any reallocation since the newer value is smaller in length. + assertSame(oldBuffer, reusableByteArray.getBuffer()); + } + } + + @Test + public void testReAllocVariableWidthViewVector() { + try (final ViewVarCharVector vector = newVector(ViewVarCharVector.class, EMPTY_SCHEMA_PATH, + Types.MinorType.VIEWVARCHAR, allocator)) { + final int capacityLimit = 4095; + final int overLimitIndex = 200; + vector.setInitialCapacity(capacityLimit); + vector.allocateNew(); + + int initialCapacity = vector.getValueCapacity(); + assertTrue(initialCapacity >= capacityLimit); + + /* Put values in indexes that fall within the initial allocation */ + vector.setSafe(0, STR1, 0, STR1.length); + vector.setSafe(initialCapacity - 1, STR2, 0, STR2.length); + + /* the set calls above should NOT have triggered a realloc */ + assertEquals(initialCapacity, vector.getValueCapacity()); + + /* Now try to put values in space that falls beyond the initial allocation */ + vector.setSafe(initialCapacity + overLimitIndex, STR3, 0, STR3.length); + + /* Check valueCapacity is more than initial allocation */ + assertTrue(initialCapacity * 2 <= vector.getValueCapacity()); + + assertArrayEquals(STR1, vector.get(0)); + assertArrayEquals(STR2, vector.get(initialCapacity - 1)); + assertArrayEquals(STR3, vector.get(initialCapacity + overLimitIndex)); + + // Set the valueCount to be more than valueCapacity of current allocation. This is possible for ValueVectors + // as we don't call setSafe for null values, but we do call setValueCount when the current batch is processed. + vector.setValueCount(vector.getValueCapacity() + overLimitIndex); + } + } + + @Test + public void testSetSafeWithArrowBufNoExcessAllocs() { + final int numValues = BaseVariableWidthViewVector.INITIAL_VALUE_ALLOCATION * 2; + final byte[] valueBytes = "hello world!!!".getBytes(StandardCharsets.UTF_8); + final int valueBytesLength = valueBytes.length; + final int isSet = 1; + try (final ViewVarCharVector fromVector = + newVector( + ViewVarCharVector.class, + EMPTY_SCHEMA_PATH, + Types.MinorType.VIEWVARCHAR, + allocator); + final ViewVarCharVector toVector = + newVector( + ViewVarCharVector.class, + EMPTY_SCHEMA_PATH, + Types.MinorType.VIEWVARCHAR, + allocator)) { + /* + * Populate the `fromVector` with `numValues` with byte-arrays, each of size `valueBytesLength`. + */ + fromVector.setInitialCapacity(numValues); + fromVector.allocateNew(); + for (int i = 0; i < numValues; ++i) { + fromVector.setSafe(i, valueBytes, 0 /*start*/, valueBytesLength); + } + fromVector.setValueCount(numValues); + ArrowBuf fromDataBuffer = fromVector.getDataBuffer(); + assertTrue(numValues * valueBytesLength <= fromDataBuffer.capacity()); + + /* + * Copy the entries one-by-one from 'fromVector' to 'toVector', but use the setSafe with + * ArrowBuf API (instead of setSafe with byte-array). + */ + toVector.setInitialCapacity(numValues); + toVector.allocateNew(); + for (int i = 0; i < numValues; i++) { + int start = fromVector.getTotalValueLengthUpToIndex(i); + // across variable + // width implementations + int end = fromVector.getTotalValueLengthUpToIndex(i + 1); + toVector.setSafe(i, isSet, start, end, fromDataBuffer); + } + + /* + * Since the 'fromVector' and 'toVector' have the same initial capacity, and were populated + * with the same varchar elements, the allocations and hence, the final capacity should be + * the same. + */ + assertEquals(fromDataBuffer.capacity(), toVector.getDataBuffer().capacity()); + } + } + + @Test + public void testSetLastSetUsage() { + try (final ViewVarCharVector vector = new ViewVarCharVector("myvector", allocator)) { + vector.allocateNew(1024 * 10, 1024); + + setBytes(0, STR1, vector); + setBytes(1, STR2, vector); + setBytes(2, STR3, vector); + setBytes(3, STR4, vector); + + /* Check current lastSet */ + assertEquals(-1, vector.getLastSet()); + + /* Check the vector output */ + assertArrayEquals(STR1, vector.get(0)); + assertArrayEquals(STR2, vector.get(1)); + assertArrayEquals(STR3, vector.get(2)); + assertArrayEquals(STR4, vector.get(3)); + + /* + * If we don't do setLastSe(3) before setValueCount(), then the latter will corrupt + * the value vector by filling in all positions [0,valuecount-1] will empty byte arrays. + * Run the test by commenting on the next line, and we should see incorrect vector output. + */ + vector.setLastSet(3); + vector.setValueCount(20); + + /* Check current lastSet */ + assertEquals(19, vector.getLastSet()); + + /* Check the vector output again */ + assertArrayEquals(STR1, vector.get(0)); + assertArrayEquals(STR2, vector.get(1)); + assertArrayEquals(STR3, vector.get(2)); + assertArrayEquals(STR4, vector.get(3)); + + assertEquals(0, vector.getValueLength(4)); + assertEquals(0, vector.getValueLength(5)); + assertEquals(0, vector.getValueLength(6)); + assertEquals(0, vector.getValueLength(7)); + assertEquals(0, vector.getValueLength(8)); + assertEquals(0, vector.getValueLength(9)); + assertEquals(0, vector.getValueLength(10)); + assertEquals(0, vector.getValueLength(11)); + assertEquals(0, vector.getValueLength(12)); + assertEquals(0, vector.getValueLength(13)); + assertEquals(0, vector.getValueLength(14)); + assertEquals(0, vector.getValueLength(15)); + assertEquals(0, vector.getValueLength(16)); + assertEquals(0, vector.getValueLength(17)); + assertEquals(0, vector.getValueLength(18)); + assertEquals(0, vector.getValueLength(19)); + } + } + + @Test + public void testFillEmptiesUsage() { + try (final ViewVarCharVector vector = new ViewVarCharVector("myvector", allocator)) { + vector.allocateNew(1024 * 10, 1024); + + setBytes(0, STR1, vector); + setBytes(1, STR2, vector); + setBytes(2, STR3, vector); + setBytes(3, STR4, vector); + + /* Check current lastSet */ + assertEquals(-1, vector.getLastSet()); + + /* Check the vector output */ + assertArrayEquals(STR1, vector.get(0)); + assertArrayEquals(STR2, vector.get(1)); + assertArrayEquals(STR3, vector.get(2)); + assertArrayEquals(STR4, vector.get(3)); + + vector.setLastSet(3); + /* fill empty byte arrays from index [4, 9] */ + vector.fillEmpties(10); + + /* Check current lastSet */ + assertEquals(9, vector.getLastSet()); + + /* Check the vector output */ + assertArrayEquals(STR1, vector.get(0)); + assertArrayEquals(STR2, vector.get(1)); + assertArrayEquals(STR3, vector.get(2)); + assertArrayEquals(STR4, vector.get(3)); + assertEquals(0, vector.getValueLength(4)); + assertEquals(0, vector.getValueLength(5)); + assertEquals(0, vector.getValueLength(6)); + assertEquals(0, vector.getValueLength(7)); + assertEquals(0, vector.getValueLength(8)); + assertEquals(0, vector.getValueLength(9)); + + setBytes(10, STR1, vector); + setBytes(11, STR2, vector); + + vector.setLastSet(11); + /* fill empty byte arrays from index [12, 14] */ + vector.setValueCount(15); + + /* Check current lastSet */ + assertEquals(14, vector.getLastSet()); + + /* Check the vector output */ + assertArrayEquals(STR1, vector.get(0)); + assertArrayEquals(STR2, vector.get(1)); + assertArrayEquals(STR3, vector.get(2)); + assertArrayEquals(STR4, vector.get(3)); + assertEquals(0, vector.getValueLength(4)); + assertEquals(0, vector.getValueLength(5)); + assertEquals(0, vector.getValueLength(6)); + assertEquals(0, vector.getValueLength(7)); + assertEquals(0, vector.getValueLength(8)); + assertEquals(0, vector.getValueLength(9)); + assertArrayEquals(STR1, vector.get(10)); + assertArrayEquals(STR2, vector.get(11)); + assertEquals(0, vector.getValueLength(12)); + assertEquals(0, vector.getValueLength(13)); + assertEquals(0, vector.getValueLength(14)); + } + } + + @Test + public void testGetBufferAddress1() { + try (final ViewVarCharVector vector = new ViewVarCharVector("myviewvector", allocator)) { + + setVector(vector, STR1, STR2, STR3, STR4); + vector.setValueCount(15); + + /* check the vector output */ + assertArrayEquals(STR1, vector.get(0)); + assertArrayEquals(STR2, vector.get(1)); + assertArrayEquals(STR3, vector.get(2)); + assertArrayEquals(STR4, vector.get(3)); + + List buffers = vector.getFieldBuffers(); + long bitAddress = vector.getValidityBufferAddress(); + long dataAddress = vector.getDataBufferAddress(); + + assertEquals(3, buffers.size()); + assertEquals(bitAddress, buffers.get(0).memoryAddress()); + assertEquals(dataAddress, buffers.get(1).memoryAddress()); + } + } + + @Test + public void testSetInitialCapacityInViews() { + try (final ViewVarCharVector vector = new ViewVarCharVector(EMPTY_SCHEMA_PATH, allocator)) { + + /* use the default 16 data bytes on average per element */ + final int viewSize = BaseVariableWidthViewVector.ELEMENT_SIZE; + int defaultCapacity = BaseVariableWidthViewVector.INITIAL_VIEW_VALUE_ALLOCATION / viewSize; + vector.setInitialCapacity(defaultCapacity); + vector.allocateNew(); + assertEquals(defaultCapacity, vector.getValueCapacity()); + assertEquals(CommonUtil.nextPowerOfTwo(defaultCapacity * viewSize), vector.getDataBuffer().capacity()); + + double density = 4.0; + final int valueCount = 5; + vector.setInitialCapacity(valueCount, density); + vector.allocateNew(); + assertEquals(8, vector.getValueCapacity()); + assertEquals(128, vector.getDataBuffer().capacity()); + int initialDataBufferSize = (int) (valueCount * density); + // making sure a databuffer is allocated + vector.set(4, "01234567890123456".getBytes(StandardCharsets.UTF_8)); + assertEquals(vector.dataBuffers.size(), 1); + ArrowBuf dataBuf = vector.dataBuffers.get(0); + try (ArrowBuf tempBuf = vector.allocator.buffer(initialDataBufferSize)) { + // replicating a new buffer allocation process when a new buffer is added to the + // data buffer when inserting an element with length > 12 + assertEquals(tempBuf.capacity(), dataBuf.capacity()); + } + } + } + + @Test + public void testGetPointerVariableWidthViews() { + final String[] sampleData = new String[]{ + "abc", "1234567890123", "def", null, "hello world java", "aaaaa", "world", "2019", null, "0717"}; + + try (ViewVarCharVector vec1 = new ViewVarCharVector("vec1", allocator); + ViewVarCharVector vec2 = new ViewVarCharVector("vec2", allocator)) { + + vec1.allocateNew((long) sampleData.length * 16, sampleData.length); + vec2.allocateNew((long) sampleData.length * 16, sampleData.length); + + for (int i = 0; i < sampleData.length; i++) { + String str = sampleData[i]; + if (str != null) { + vec1.set(i, sampleData[i].getBytes(StandardCharsets.UTF_8)); + vec2.set(i, sampleData[i].getBytes(StandardCharsets.UTF_8)); + } else { + vec1.setNull(i); + + vec2.setNull(i); + } + } + + ArrowBufPointer ptr1 = new ArrowBufPointer(); + ArrowBufPointer ptr2 = new ArrowBufPointer(); + + for (int i = 0; i < sampleData.length; i++) { + vec1.getDataPointer(i, ptr1); + vec2.getDataPointer(i, ptr2); + + assertTrue(ptr1.equals(ptr2)); + assertTrue(ptr2.equals(ptr2)); + } + } + } + + @Test + public void testGetNullFromVariableWidthViewVector() { + try (final ViewVarCharVector varCharViewVector = new ViewVarCharVector("viewvarcharvec", allocator); + final ViewVarBinaryVector varBinaryViewVector = new ViewVarBinaryVector("viewvarbinary", allocator)) { + varCharViewVector.allocateNew(16, 1); + varBinaryViewVector.allocateNew(16, 1); + + varCharViewVector.setNull(0); + varBinaryViewVector.setNull(0); + + assertNull(varCharViewVector.get(0)); + assertNull(varBinaryViewVector.get(0)); + } + } + + @Test + public void testVariableWidthViewVectorNullHashCode() { + try (ViewVarCharVector viewVarChar = new ViewVarCharVector("view var char vector", allocator)) { + viewVarChar.allocateNew(100, 1); + viewVarChar.setValueCount(1); + + viewVarChar.set(0, "abc".getBytes(StandardCharsets.UTF_8)); + viewVarChar.setNull(0); + + assertEquals(0, viewVarChar.hashCode(0)); + } + } + + @Test + public void testUnloadVariableWidthViewVector() { + try (final ViewVarCharVector viewVarCharVector = new ViewVarCharVector("view var char", allocator)) { + viewVarCharVector.allocateNew(16, 2); + viewVarCharVector.setValueCount(2); + viewVarCharVector.set(0, "abcd".getBytes(StandardCharsets.UTF_8)); + + List bufs = viewVarCharVector.getFieldBuffers(); + assertEquals(2, bufs.size()); + + ArrowBuf viewBuf = bufs.get(1); + + assertEquals(32, viewBuf.writerIndex()); + final String longString = "012345678901234"; + viewVarCharVector.set(1, longString.getBytes(StandardCharsets.UTF_8)); + + bufs = viewVarCharVector.getFieldBuffers(); + assertEquals(3, bufs.size()); + + ArrowBuf referenceBuf = bufs.get(2); + assertEquals(longString.length(), referenceBuf.writerIndex()); + } + } + + @Test + public void testUnSupportedOffSet() { + // offset is not a feature required in ViewVarCharVector + assertThrows(UnsupportedOperationException.class, () -> { + try (final ViewVarCharVector vector = new ViewVarCharVector("myviewvector", allocator)) { + + setVector(vector, STR1, STR2); + vector.setValueCount(2); + + /* check the vector output */ + assertArrayEquals(STR1, vector.get(0)); + assertArrayEquals(STR2, vector.get(1)); + + vector.getOffsetBuffer(); + } + }); + } + + private void validateViewBuffer(int index, ViewVarCharVector vector, byte[] expectedData, + int expectedBufId, int expectedOffSet) { + final ArrowBuf viewBuffer = vector.viewBuffer; + int writePosition = index * BaseVariableWidthViewVector.ELEMENT_SIZE; + final int prefixBufWidth = BaseVariableWidthViewVector.PREFIX_WIDTH; + final int lengthBufWidth = BaseVariableWidthViewVector.LENGTH_WIDTH; + int length = viewBuffer.getInt(writePosition); + + // validate length of the view + assertEquals(expectedData.length, length); + + byte[] prefixBytes = new byte[prefixBufWidth]; + viewBuffer.getBytes(writePosition + lengthBufWidth, prefixBytes); + + // validate the prefix + byte[] expectedPrefixBytes = new byte[prefixBufWidth]; + System.arraycopy(expectedData, 0, expectedPrefixBytes, 0, prefixBufWidth); + assertArrayEquals(expectedPrefixBytes, prefixBytes); + + if (length > 12) { + /// validate bufId + int bufId = viewBuffer.getInt(writePosition + lengthBufWidth + prefixBufWidth); + assertEquals(expectedBufId, bufId); + // validate offset + int offset = viewBuffer.getInt(writePosition + + lengthBufWidth + + prefixBufWidth + + BaseVariableWidthViewVector.BUF_INDEX_WIDTH); + assertEquals(expectedOffSet, offset); + } + // validate retrieved data + assertArrayEquals(expectedData, vector.get(index)); + } + + @Test + public void testOverwriteShortFromLongString() { + /*NA: not applicable */ + // Overwriting at the beginning of the buffer. + try (final ViewVarCharVector vector = new ViewVarCharVector("myviewvector", allocator)) { + vector.allocateNew(16, 1); + // set short string + vector.set(0, STR0); + vector.setValueCount(1); + assertEquals(0, vector.dataBuffers.size()); + assertArrayEquals(STR0, vector.get(0)); + + validateViewBuffer(0, vector, STR0, /*NA*/-1, /*NA*/-1); + + // set long string + vector.set(0, STR3); + vector.setValueCount(1); + assertEquals(1, vector.dataBuffers.size()); + assertArrayEquals(STR3, vector.get(0)); + + validateViewBuffer(0, vector, STR3, 0, 0); + } + + // Overwriting in the middle of the buffer when existing buffers are all shorts. + try (final ViewVarCharVector vector = new ViewVarCharVector("myviewvector", allocator)) { + vector.allocateNew(48, 3); + // set short string 1 + vector.set(0, STR0); + // set short string 2 + vector.set(1, STR5); + // set short string 3 + vector.set(2, STR6); + vector.setValueCount(3); + + // overwrite index 1 with a long string + vector.set(1, STR7); + vector.setValueCount(3); + + validateViewBuffer(0, vector, STR0, /*NA*/-1, /*NA*/-1); + validateViewBuffer(1, vector, STR7, 0, 0); + validateViewBuffer(2, vector, STR6, /*NA*/-1, /*NA*/-1); + } + + // Overwriting in the middle of the buffer with a mix of short and long strings. + try (final ViewVarCharVector vector = new ViewVarCharVector("myviewvector", allocator)) { + vector.allocateNew(80, 5); + // set short string 1 + vector.set(0, STR0); + // set long string 1 + vector.set(1, STR3); + // set short string 2 + vector.set(2, STR5); + // set short string 3 + vector.set(3, STR6); + // set long string 2 + vector.set(4, STR7); + vector.setValueCount(5); + + // overwrite index 2 with a long string + vector.set(2, STR8); + vector.setValueCount(5); + + validateViewBuffer(0, vector, STR0, /*NA*/-1, /*NA*/-1); + validateViewBuffer(1, vector, STR3, 0, 0); + // Since we did overwrite index 2 with STR8, and as we are using append-only approach, + // it will be appended to the data buffer. + // Thus, it will be stored in the dataBuffer in order i.e. [STR3, STR7, STR8]. + validateViewBuffer(2, vector, STR8, 0, STR3.length + STR7.length); + validateViewBuffer(3, vector, STR6, /*NA*/-1, /*NA*/-1); + validateViewBuffer(4, vector, STR7, 0, STR3.length); + } + + // Overwriting in the middle of the buffer with a mix of short and long strings. + // Here the short string is overwritten with a long string, and its length is larger than + // the remaining capacity of the existing data buffer. + // This would allocate a new buffer in the data buffers. + try (final ViewVarCharVector vector = new ViewVarCharVector("myviewvector", allocator)) { + vector.allocateNew(80, 5); + // set short string 1 + vector.set(0, STR0); + // set long string 1 + vector.set(1, STR3); + // set short string 2 + vector.set(2, STR5); + // set short string 3 + vector.set(3, STR6); + // set long string 2 + vector.set(4, STR7); + + vector.setValueCount(5); + + // overwrite index 2 with a long string + String longString = generateRandomString(128); + byte[] longStringBytes = longString.getBytes(StandardCharsets.UTF_8); + // since the append-only approach is used and the remaining capacity + // is not enough to store the new string; a new buffer will be allocated. + final ArrowBuf currentDataBuf = vector.dataBuffers.get(0); + final long remainingCapacity = currentDataBuf.capacity() - currentDataBuf.writerIndex(); + assertTrue(remainingCapacity < longStringBytes.length); + vector.set(2, longStringBytes); + vector.setValueCount(5); + + validateViewBuffer(0, vector, STR0, /*NA*/-1, /*NA*/-1); + validateViewBuffer(1, vector, STR3, 0, 0); + // overwritten long string will be stored in the new data buffer. + validateViewBuffer(2, vector, longStringBytes, 1, 0); + validateViewBuffer(3, vector, STR6, /*NA*/-1, /*NA*/-1); + validateViewBuffer(4, vector, STR7, 0, STR3.length); + } + } + + @Test + public void testOverwriteLongFromShortString() { + // Overwriting at the beginning of the buffer. + try (final ViewVarCharVector vector = new ViewVarCharVector("myviewvector", allocator)) { + vector.allocateNew(16, 1); + // set short string + vector.set(0, STR3); + vector.setValueCount(1); + // set long string + vector.set(0, STR0); + vector.setValueCount(1); + + validateViewBuffer(0, vector, STR0, /*NA*/-1, /*NA*/-1); + } + + // Overwriting in the middle of the buffer when existing buffers are all longs. + try (final ViewVarCharVector vector = new ViewVarCharVector("myviewvector", allocator)) { + vector.allocateNew(48, 3); + // set long string 1 + vector.set(0, STR3); + // set long string 2 + vector.set(1, STR8); + // set long string 3 + vector.set(2, STR7); + vector.setValueCount(3); + + // overwrite index 1 with a short string + vector.set(1, STR6); + vector.setValueCount(3); + + validateViewBuffer(0, vector, STR3, 0, 0); + validateViewBuffer(1, vector, STR6, /*NA*/-1, /*NA*/-1); + // since the append-only approach is used, + // STR8 will still be in the first data buffer in dataBuffers. + validateViewBuffer(2, vector, STR7, 0, STR3.length + STR8.length); + } + + // Overwriting in the middle of the buffer with a mix of short and long strings. + try (final ViewVarCharVector vector = new ViewVarCharVector("myviewvector", allocator)) { + vector.allocateNew(80, 5); + // set long string 1 + vector.set(0, STR3); + // set short string 1 + vector.set(1, STR5); + // set long string 2 + vector.set(2, STR7); + // set long string 3 + vector.set(3, STR8); + // set short string 2 + vector.set(4, STR6); + vector.setValueCount(5); + + // overwrite index 2 with a short string + vector.set(2, STR0); + vector.setValueCount(5); + + validateViewBuffer(0, vector, STR3, 0, 0); + validateViewBuffer(1, vector, STR5, /*NA*/-1, /*NA*/-1); + validateViewBuffer(2, vector, STR0, /*NA*/-1, /*NA*/-1); + // since the append-only approach is used, + // STR7 will still be in the first data buffer in dataBuffers. + validateViewBuffer(3, vector, STR8, 0, STR3.length + STR7.length); + validateViewBuffer(4, vector, STR6, /*NA*/-1, /*NA*/-1); + } + } + + @Test + public void testOverwriteLongFromAShorterLongString() { + // Overwriting at the beginning of the buffer. + try (final ViewVarCharVector vector = new ViewVarCharVector("myviewvector", allocator)) { + vector.allocateNew(16, 1); + // set long string + vector.set(0, STR7); + vector.setValueCount(1); + // set shorter long string, since append-only approach is used and the remaining capacity + // is not enough to store the new string; a new buffer will be allocated. + final ArrowBuf currentDataBuf = vector.dataBuffers.get(0); + final long remainingCapacity = currentDataBuf.capacity() - currentDataBuf.writerIndex(); + assertTrue(remainingCapacity < STR3.length); + // set shorter long string + vector.set(0, STR3); + vector.setValueCount(1); + + validateViewBuffer(0, vector, STR3, 1, 0); + } + + // Overwriting in the middle of the buffer when existing buffers are all longs. + try (final ViewVarCharVector vector = new ViewVarCharVector("myviewvector", allocator)) { + // extra memory is allocated + vector.allocateNew(128, 3); + // set long string 1 + vector.set(0, STR3); + // set long string 2 + vector.set(1, STR8); + // set long string 3 + vector.set(2, STR7); + vector.setValueCount(3); + + // overwrite index 1 with a shorter long string + // Since append-only approach is used + // and the remaining capacity is enough to store in the same data buffer.; + final ArrowBuf currentDataBuf = vector.dataBuffers.get(0); + final long remainingCapacity = currentDataBuf.capacity() - currentDataBuf.writerIndex(); + assertTrue(remainingCapacity > STR2.length); + vector.set(1, STR2); + vector.setValueCount(3); + + validateViewBuffer(0, vector, STR3, 0, 0); + // since the append-only approach is used, + // STR8 will still be in the first data buffer in dataBuffers. + validateViewBuffer(1, vector, STR2, 0, STR3.length + STR8.length + STR7.length); + validateViewBuffer(2, vector, STR7, 0, STR3.length + STR8.length); + } + + // Overwriting in the middle of the buffer with a mix of short and long strings. + try (final ViewVarCharVector vector = new ViewVarCharVector("myviewvector", allocator)) { + vector.allocateNew(128, 5); + // set long string 1 + vector.set(0, STR3); + // set short string 1 + vector.set(1, STR5); + // set long string 2 + vector.set(2, STR7); + // set long string 3 + vector.set(3, STR8); + // set short string 2 + vector.set(4, STR6); + vector.setValueCount(5); + + // overwrite index 2 with a shorter long string + // Since append-only approach is used + // and the remaining capacity is enough to store in the same data buffer.; + final ArrowBuf currentDataBuf = vector.dataBuffers.get(0); + final long remainingCapacity = currentDataBuf.capacity() - currentDataBuf.writerIndex(); + assertTrue(remainingCapacity > STR2.length); + vector.set(2, STR2); + vector.setValueCount(5); + + validateViewBuffer(0, vector, STR3, 0, 0); + validateViewBuffer(1, vector, STR5, /*NA*/-1, /*NA*/-1); + // since the append-only approach is used, + // STR7 will still be in the first data buffer in dataBuffers. + validateViewBuffer(2, vector, STR2, 0, STR3.length + + STR7.length + STR8.length); + validateViewBuffer(3, vector, STR8, 0, STR3.length + STR7.length); + validateViewBuffer(4, vector, STR6, /*NA*/-1, /*NA*/-1); + } + } + + @Test + public void testOverwriteLongFromALongerLongString() { + // Overwriting at the beginning of the buffer. + try (final ViewVarCharVector vector = new ViewVarCharVector("myviewvector", allocator)) { + vector.allocateNew(16, 1); + // set long string + vector.set(0, STR3); + vector.setValueCount(1); + // set longer long string, since append-only approach is used and the remaining capacity + // is not enough to store the new string; a new buffer will be allocated. + final ArrowBuf currentDataBuf = vector.dataBuffers.get(0); + final long remainingCapacity = currentDataBuf.capacity() - currentDataBuf.writerIndex(); + assertTrue(remainingCapacity < STR7.length); + // set longer long string + vector.set(0, STR7); + vector.setValueCount(1); + + validateViewBuffer(0, vector, STR7, 1, 0); + } + + // Overwriting in the middle of the buffer when existing buffers are all longs. + try (final ViewVarCharVector vector = new ViewVarCharVector("myviewvector", allocator)) { + // extra memory is allocated + vector.allocateNew(48, 3); + // set long string 1 + vector.set(0, STR3); + // set long string 2 + vector.set(1, STR8); + // set long string 3 + vector.set(2, STR7); + vector.setValueCount(3); + + // overwrite index 1 with a longer long string + // the remaining capacity is not enough to store in the same data buffer + // since a new buffer is added to the dataBuffers + final ArrowBuf currentDataBuf = vector.dataBuffers.get(0); + final long remainingCapacity = currentDataBuf.capacity() - currentDataBuf.writerIndex(); + String longerString = generateRandomString(35); + byte[] longerStringBytes = longerString.getBytes(StandardCharsets.UTF_8); + assertTrue(remainingCapacity < longerStringBytes.length); + + vector.set(1, longerStringBytes); + vector.setValueCount(3); + + validateViewBuffer(0, vector, STR3, 0, 0); + validateViewBuffer(1, vector, longerStringBytes, 1, 0); + // since the append-only approach is used, + // STR8 will still be in the first data buffer in dataBuffers. + validateViewBuffer(2, vector, STR7, 0, STR3.length + STR8.length); + } + + // Overwriting in the middle of the buffer with a mix of short and long strings. + try (final ViewVarCharVector vector = new ViewVarCharVector("myviewvector", allocator)) { + vector.allocateNew(128, 5); + // set long string 1 + vector.set(0, STR3); + // set short string 1 + vector.set(1, STR5); + // set long string 2 + vector.set(2, STR7); + // set long string 3 + vector.set(3, STR2); + // set short string 2 + vector.set(4, STR6); + vector.setValueCount(5); + + // overwrite index 2 with a longer long string + // the remaining capacity is enough to store in the same data buffer + final ArrowBuf currentDataBuf = vector.dataBuffers.get(0); + final long remainingCapacity = currentDataBuf.capacity() - currentDataBuf.writerIndex(); + String longerString = generateRandomString(24); + byte[] longerStringBytes = longerString.getBytes(StandardCharsets.UTF_8); + assertTrue(remainingCapacity > longerStringBytes.length); + + vector.set(2, longerStringBytes); + vector.setValueCount(5); + + validateViewBuffer(0, vector, STR3, 0, 0); + validateViewBuffer(1, vector, STR5, /*NA*/-1, /*NA*/-1); + // since the append-only approach is used, + // STR7 will still be in the first data buffer in dataBuffers. + validateViewBuffer(2, vector, longerStringBytes, 0, STR3.length + STR7.length + STR2.length); + validateViewBuffer(3, vector, STR2, 0, STR3.length + STR7.length); + validateViewBuffer(4, vector, STR6, /*NA*/-1, /*NA*/-1); + } + } + + @Test + public void testSafeOverwriteShortFromLongString() { + /*NA: not applicable */ + // Overwriting at the beginning of the buffer. + try (final ViewVarCharVector vector = new ViewVarCharVector("myviewvector", allocator)) { + vector.allocateNew(16, 1); + // set short string + vector.setSafe(0, STR0); + vector.setValueCount(1); + assertEquals(0, vector.dataBuffers.size()); + assertArrayEquals(STR0, vector.get(0)); + + // set long string + vector.setSafe(0, STR3); + vector.setValueCount(1); + assertEquals(1, vector.dataBuffers.size()); + assertArrayEquals(STR3, vector.get(0)); + + } + + // Overwriting in the middle of the buffer when existing buffers are all shorts. + try (final ViewVarCharVector vector = new ViewVarCharVector("myviewvector", allocator)) { + vector.allocateNew(16, 3); + // set short string 1 + vector.setSafe(0, STR0); + // set short string 2 + vector.setSafe(1, STR5); + // set short string 3 + vector.setSafe(2, STR6); + vector.setValueCount(3); + + // overwrite index 1 with a long string + vector.setSafe(1, STR7); + vector.setValueCount(3); + + assertArrayEquals(STR0, vector.get(0)); + assertArrayEquals(STR7, vector.get(1)); + assertArrayEquals(STR6, vector.get(2)); + } + + // Overwriting in the middle of the buffer with a mix of short and long strings. + try (final ViewVarCharVector vector = new ViewVarCharVector("myviewvector", allocator)) { + vector.allocateNew(16, 5); + // set short string 1 + vector.setSafe(0, STR0); + // set long string 1 + vector.setSafe(1, STR3); + // set short string 2 + vector.setSafe(2, STR5); + // set short string 3 + vector.setSafe(3, STR6); + // set long string 2 + vector.setSafe(4, STR7); + vector.setValueCount(5); + + // overwrite index 2 with a long string + vector.setSafe(2, STR8); + vector.setValueCount(5); + + assertArrayEquals(STR0, vector.get(0)); + assertArrayEquals(STR3, vector.get(1)); + assertArrayEquals(STR8, vector.get(2)); + assertArrayEquals(STR6, vector.get(3)); + assertArrayEquals(STR7, vector.get(4)); + } + + // Overwriting in the middle of the buffer with a mix of short and long strings. + try (final ViewVarCharVector vector = new ViewVarCharVector("myviewvector", allocator)) { + vector.allocateNew(16, 5); + // set short string 1 + vector.setSafe(0, STR0); + // set long string 1 + vector.setSafe(1, STR3); + // set short string 2 + vector.setSafe(2, STR5); + // set short string 3 + vector.setSafe(3, STR6); + // set long string 2 + vector.setSafe(4, STR7); + + vector.setValueCount(5); + + // overwrite index 2 with a long string + String longString = generateRandomString(128); + byte[] longStringBytes = longString.getBytes(StandardCharsets.UTF_8); + + vector.setSafe(2, longStringBytes); + vector.setValueCount(5); + + assertArrayEquals(STR0, vector.get(0)); + assertArrayEquals(STR3, vector.get(1)); + assertArrayEquals(longStringBytes, vector.get(2)); + assertArrayEquals(STR6, vector.get(3)); + assertArrayEquals(STR7, vector.get(4)); + } + } + + @Test + public void testSafeOverwriteLongFromShortString() { + // Overwriting at the beginning of the buffer. + try (final ViewVarCharVector vector = new ViewVarCharVector("myviewvector", allocator)) { + vector.allocateNew(16, 1); + // set short string + vector.setSafe(0, STR3); + vector.setValueCount(1); + // set long string + vector.setSafe(0, STR0); + vector.setValueCount(1); + + assertArrayEquals(STR0, vector.get(0)); + } + + // Overwriting in the middle of the buffer when existing buffers are all longs. + try (final ViewVarCharVector vector = new ViewVarCharVector("myviewvector", allocator)) { + vector.allocateNew(16, 3); + // set long string 1 + vector.setSafe(0, STR3); + // set long string 2 + vector.setSafe(1, STR8); + // set long string 3 + vector.setSafe(2, STR7); + vector.setValueCount(3); + + // overwrite index 1 with a short string + vector.setSafe(1, STR6); + vector.setValueCount(3); + + assertArrayEquals(STR3, vector.get(0)); + assertArrayEquals(STR6, vector.get(1)); + assertArrayEquals(STR7, vector.get(2)); + } + + // Overwriting in the middle of the buffer with a mix of short and long strings. + try (final ViewVarCharVector vector = new ViewVarCharVector("myviewvector", allocator)) { + vector.allocateNew(16, 5); + // set long string 1 + vector.setSafe(0, STR3); + // set short string 1 + vector.setSafe(1, STR5); + // set long string 2 + vector.setSafe(2, STR7); + // set long string 3 + vector.setSafe(3, STR8); + // set short string 2 + vector.setSafe(4, STR6); + vector.setValueCount(5); + + // overwrite index 2 with a short string + vector.setSafe(2, STR0); + vector.setValueCount(5); + + assertArrayEquals(STR3, vector.get(0)); + assertArrayEquals(STR5, vector.get(1)); + assertArrayEquals(STR0, vector.get(2)); + assertArrayEquals(STR8, vector.get(3)); + assertArrayEquals(STR6, vector.get(4)); + } + } + + @Test + public void testSafeOverwriteLongFromAShorterLongString() { + // Overwriting at the beginning of the buffer. + try (final ViewVarCharVector vector = new ViewVarCharVector("myviewvector", allocator)) { + vector.allocateNew(16, 1); + // set long string + vector.setSafe(0, STR7); + vector.setValueCount(1); + // set shorter long string + vector.setSafe(0, STR3); + vector.setValueCount(1); + + assertArrayEquals(STR3, vector.get(0)); + } + + // Overwriting in the middle of the buffer when existing buffers are all longs. + try (final ViewVarCharVector vector = new ViewVarCharVector("myviewvector", allocator)) { + // extra memory is allocated + vector.allocateNew(16, 3); + // set long string 1 + vector.setSafe(0, STR3); + // set long string 2 + vector.setSafe(1, STR8); + // set long string 3 + vector.setSafe(2, STR7); + vector.setValueCount(3); + + // overwrite index 1 with a shorter long string + vector.setSafe(1, STR2); + vector.setValueCount(3); + + assertArrayEquals(STR3, vector.get(0)); + assertArrayEquals(STR2, vector.get(1)); + assertArrayEquals(STR7, vector.get(2)); + } + + // Overwriting in the middle of the buffer with a mix of short and long strings. + try (final ViewVarCharVector vector = new ViewVarCharVector("myviewvector", allocator)) { + vector.allocateNew(16, 5); + // set long string 1 + vector.setSafe(0, STR3); + // set short string 1 + vector.setSafe(1, STR5); + // set long string 2 + vector.setSafe(2, STR7); + // set long string 3 + vector.setSafe(3, STR8); + // set short string 2 + vector.setSafe(4, STR6); + vector.setValueCount(5); + + // overwrite index 2 with a shorter long string + vector.setSafe(2, STR2); + vector.setValueCount(5); + + assertArrayEquals(STR3, vector.get(0)); + assertArrayEquals(STR5, vector.get(1)); + assertArrayEquals(STR2, vector.get(2)); + assertArrayEquals(STR8, vector.get(3)); + assertArrayEquals(STR6, vector.get(4)); + } + } + + @Test + public void testSafeOverwriteLongFromALongerLongString() { + // Overwriting at the beginning of the buffer. + try (final ViewVarCharVector vector = new ViewVarCharVector("myviewvector", allocator)) { + vector.allocateNew(16, 1); + // set long string + vector.setSafe(0, STR3); + vector.setValueCount(1); + // set longer long string + vector.setSafe(0, STR7); + vector.setValueCount(1); + + assertArrayEquals(STR7, vector.get(0)); + } + + // Overwriting in the middle of the buffer when existing buffers are all longs. + try (final ViewVarCharVector vector = new ViewVarCharVector("myviewvector", allocator)) { + // extra memory is allocated + vector.allocateNew(16, 3); + // set long string 1 + vector.setSafe(0, STR3); + // set long string 2 + vector.setSafe(1, STR8); + // set long string 3 + vector.setSafe(2, STR7); + vector.setValueCount(3); + + String longerString = generateRandomString(35); + byte[] longerStringBytes = longerString.getBytes(StandardCharsets.UTF_8); + + vector.setSafe(1, longerStringBytes); + vector.setValueCount(3); + + assertArrayEquals(STR3, vector.get(0)); + assertArrayEquals(longerStringBytes, vector.get(1)); + assertArrayEquals(STR7, vector.get(2)); + } + + // Overwriting in the middle of the buffer with a mix of short and long strings. + try (final ViewVarCharVector vector = new ViewVarCharVector("myviewvector", allocator)) { + vector.allocateNew(16, 5); + // set long string 1 + vector.setSafe(0, STR3); + // set short string 1 + vector.setSafe(1, STR5); + // set long string 2 + vector.setSafe(2, STR7); + // set long string 3 + vector.setSafe(3, STR2); + // set short string 2 + vector.setSafe(4, STR6); + vector.setValueCount(5); + + String longerString = generateRandomString(24); + byte[] longerStringBytes = longerString.getBytes(StandardCharsets.UTF_8); + + vector.setSafe(2, longerStringBytes); + vector.setValueCount(5); + + assertArrayEquals(STR3, vector.get(0)); + assertArrayEquals(STR5, vector.get(1)); + assertArrayEquals(longerStringBytes, vector.get(2)); + assertArrayEquals(STR2, vector.get(3)); + assertArrayEquals(STR6, vector.get(4)); + + } + } + + @Test + public void testVectorLoadUnload() { + + try (final ViewVarCharVector vector1 = new ViewVarCharVector("myvector", allocator)) { + + setVector(vector1, STR1, STR2, STR3, STR4, STR5, STR6); + + assertEquals(5, vector1.getLastSet()); + vector1.setValueCount(15); + assertEquals(14, vector1.getLastSet()); + + /* Check the vector output */ + assertArrayEquals(STR1, vector1.get(0)); + assertArrayEquals(STR2, vector1.get(1)); + assertArrayEquals(STR3, vector1.get(2)); + assertArrayEquals(STR4, vector1.get(3)); + assertArrayEquals(STR5, vector1.get(4)); + assertArrayEquals(STR6, vector1.get(5)); + + Field field = vector1.getField(); + String fieldName = field.getName(); + + List fields = new ArrayList<>(); + List fieldVectors = new ArrayList<>(); + + fields.add(field); + fieldVectors.add(vector1); + + Schema schema = new Schema(fields); + + VectorSchemaRoot schemaRoot1 = new VectorSchemaRoot(schema, fieldVectors, vector1.getValueCount()); + VectorUnloader vectorUnloader = new VectorUnloader(schemaRoot1); + + try ( + ArrowRecordBatch recordBatch = vectorUnloader.getRecordBatch(); + BufferAllocator finalVectorsAllocator = allocator.newChildAllocator("new vector", 0, Long.MAX_VALUE); + VectorSchemaRoot schemaRoot2 = VectorSchemaRoot.create(schema, finalVectorsAllocator); + ) { + + VectorLoader vectorLoader = new VectorLoader(schemaRoot2); + vectorLoader.load(recordBatch); + + ViewVarCharVector vector2 = (ViewVarCharVector) schemaRoot2.getVector(fieldName); + /* + * lastSet would have internally been set by VectorLoader.load() when it invokes + * loadFieldBuffers. + */ + assertEquals(14, vector2.getLastSet()); + vector2.setValueCount(25); + assertEquals(24, vector2.getLastSet()); + + /* Check the vector output */ + assertArrayEquals(STR1, vector2.get(0)); + assertArrayEquals(STR2, vector2.get(1)); + assertArrayEquals(STR3, vector2.get(2)); + assertArrayEquals(STR4, vector2.get(3)); + assertArrayEquals(STR5, vector2.get(4)); + assertArrayEquals(STR6, vector2.get(5)); + } + } + } + + static Stream vectorCreatorProvider() { + return Stream.of( + Arguments.of((Function) + (allocator -> newVector(ViewVarBinaryVector.class, EMPTY_SCHEMA_PATH, + Types.MinorType.VIEWVARBINARY, allocator))), + Arguments.of((Function) + (allocator -> newVector(ViewVarCharVector.class, EMPTY_SCHEMA_PATH, + Types.MinorType.VIEWVARCHAR, allocator))) + ); + } + + @ParameterizedTest + @MethodSource({"vectorCreatorProvider"}) + public void testCopyFromWithNulls(Function vectorCreator) { + try (final BaseVariableWidthViewVector vector = vectorCreator.apply(allocator); + final BaseVariableWidthViewVector vector2 = vectorCreator.apply(allocator)) { + final int initialCapacity = 1024; + vector.setInitialCapacity(initialCapacity); + vector.allocateNew(); + int capacity = vector.getValueCapacity(); + assertTrue(capacity >= initialCapacity); + + // setting number of values such that we have enough space in the initial allocation + // to avoid re-allocation. This is to test copyFrom() without re-allocation. + final int numberOfValues = initialCapacity / 2 / ViewVarCharVector.ELEMENT_SIZE; + + final String prefixString = generateRandomString(12); + + for (int i = 0; i < numberOfValues; i++) { + if (i % 3 == 0) { + // null values + vector.setNull(i); + } else if (i % 3 == 1) { + // short strings + byte[] b = Integer.toString(i).getBytes(StandardCharsets.UTF_8); + vector.set(i, b, 0, b.length); + } else { + // long strings + byte[] b = (i + prefixString).getBytes(StandardCharsets.UTF_8); + vector.set(i, b, 0, b.length); + } + } + + assertEquals(capacity, vector.getValueCapacity()); + + vector.setValueCount(numberOfValues); + + for (int i = 0; i < numberOfValues; i++) { + if (i % 3 == 0) { + assertNull(vector.getObject(i)); + } else if (i % 3 == 1) { + assertArrayEquals(Integer.toString(i).getBytes(StandardCharsets.UTF_8), + vector.get(i), + "unexpected value at index: " + i); + } else { + assertArrayEquals((i + prefixString).getBytes(StandardCharsets.UTF_8), + vector.get(i), + "unexpected value at index: " + i); + } + } + + vector2.setInitialCapacity(initialCapacity); + vector2.allocateNew(); + int capacity2 = vector2.getValueCapacity(); + assertEquals(capacity2, capacity); + + for (int i = 0; i < numberOfValues; i++) { + vector2.copyFrom(i, i, vector); + if (i % 3 == 0) { + assertNull(vector2.getObject(i)); + } else if (i % 3 == 1) { + assertArrayEquals(Integer.toString(i).getBytes(StandardCharsets.UTF_8), + vector.get(i), + "unexpected value at index: " + i); + } else { + assertArrayEquals((i + prefixString).getBytes(StandardCharsets.UTF_8), + vector.get(i), + "unexpected value at index: " + i); + } + } + + assertEquals(capacity, vector2.getValueCapacity()); + + vector2.setValueCount(numberOfValues); + + for (int i = 0; i < numberOfValues; i++) { + if (i % 3 == 0) { + assertNull(vector2.getObject(i)); + } else if (i % 3 == 1) { + assertArrayEquals(Integer.toString(i).getBytes(StandardCharsets.UTF_8), + vector.get(i), + "unexpected value at index: " + i); + } else { + assertArrayEquals((i + prefixString).getBytes(StandardCharsets.UTF_8), + vector.get(i), + "unexpected value at index: " + i); + } + } + } + } + + @ParameterizedTest + @MethodSource("vectorCreatorProvider") + public void testCopyFromSafeWithNulls(Function vectorCreator) { + try (final BaseVariableWidthViewVector vector = vectorCreator.apply(allocator); + final BaseVariableWidthViewVector vector2 = vectorCreator.apply(allocator)) { + + final int initialCapacity = 4096; + vector.setInitialCapacity(initialCapacity); + vector.allocateNew(); + int capacity = vector.getValueCapacity(); + assertTrue(capacity >= initialCapacity); + + final int numberOfValues = initialCapacity / ViewVarCharVector.ELEMENT_SIZE; + + final String prefixString = generateRandomString(12); + + for (int i = 0; i < numberOfValues; i++) { + if (i % 3 == 0) { + // null values + vector.setNull(i); + } else if (i % 3 == 1) { + // short strings + byte[] b = Integer.toString(i).getBytes(StandardCharsets.UTF_8); + vector.setSafe(i, b, 0, b.length); + } else { + // long strings + byte[] b = (i + prefixString).getBytes(StandardCharsets.UTF_8); + vector.setSafe(i, b, 0, b.length); + } + } + + /* NO reAlloc() should have happened in setSafe() */ + assertEquals(capacity, vector.getValueCapacity()); + + vector.setValueCount(numberOfValues); + + for (int i = 0; i < numberOfValues; i++) { + if (i % 3 == 0) { + assertNull(vector.getObject(i)); + } else if (i % 3 == 1) { + assertArrayEquals(Integer.toString(i).getBytes(StandardCharsets.UTF_8), + vector.get(i), + "unexpected value at index: " + i); + } else { + assertArrayEquals((i + prefixString).getBytes(StandardCharsets.UTF_8), + vector.get(i), + "unexpected value at index: " + i); + } + } + + vector2.setInitialCapacity(initialCapacity); + vector2.allocateNew(); + int capacity2 = vector2.getValueCapacity(); + assertEquals(capacity2, capacity); + + for (int i = 0; i < numberOfValues; i++) { + vector2.copyFromSafe(i, i, vector); + if (i % 3 == 0) { + assertNull(vector2.getObject(i)); + } else if (i % 3 == 1) { + assertArrayEquals(Integer.toString(i).getBytes(StandardCharsets.UTF_8), + vector.get(i), + "unexpected value at index: " + i); + } else { + assertArrayEquals((i + prefixString).getBytes(StandardCharsets.UTF_8), + vector.get(i), + "unexpected value at index: " + i); + } + } + + /* NO reAlloc() should have happened in setSafe() */ + assertEquals(capacity, vector2.getValueCapacity()); + + vector2.setValueCount(numberOfValues); + + for (int i = 0; i < numberOfValues; i++) { + if (i % 3 == 0) { + assertNull(vector2.getObject(i)); + } else if (i % 3 == 1) { + assertArrayEquals(Integer.toString(i).getBytes(StandardCharsets.UTF_8), + vector.get(i), + "unexpected value at index: " + i); + } else { + assertArrayEquals((i + prefixString).getBytes(StandardCharsets.UTF_8), + vector.get(i), + "unexpected value at index: " + i); + } + } + } + } + + public byte[] generateRandomBinaryData(int size) { + byte[] binaryData = new byte[size]; + random.nextBytes(binaryData); + return binaryData; + } + + private byte[][] generateBinaryDataArray(int size, int length) { + byte[][] binaryDataArray = new byte[size][]; + for (int i = 0; i < size; i++) { + binaryDataArray[i] = generateRandomBinaryData(length); + } + return binaryDataArray; + } + + private void testSplitAndTransferOnSlicedBufferHelper(BaseVariableWidthViewVector targetVector, + BaseVariableWidthViewVector sourceVector, int startIndex, int length, byte[][] data) { + sourceVector.allocateNew(1024 * 10, 1024); + + for (int i = 0; i < data.length; i++) { + sourceVector.set(i, data[i]); + } + sourceVector.setValueCount(data.length); + + final long allocatedMem = allocator.getAllocatedMemory(); + final int validityRefCnt = sourceVector.getValidityBuffer().refCnt(); + final int dataRefCnt = sourceVector.getDataBuffer().refCnt(); + + sourceVector.splitAndTransferTo(startIndex, length, targetVector); + // we allocate view and data buffers for the target vector + assertTrue(allocatedMem < allocator.getAllocatedMemory()); + + // The validity buffer is sliced from the same buffer.See BaseFixedWidthViewVector#allocateBytes. + // Therefore, the refcnt of the validity buffer is increased once since the startIndex is 0. + assertEquals(validityRefCnt + 1, sourceVector.getValidityBuffer().refCnt()); + // since the new view buffer is allocated, the refcnt is the same as the source vector. + assertEquals(dataRefCnt, sourceVector.getDataBuffer().refCnt()); + } + + /** + * ARROW-7831: + * this checks a slice taken off a buffer is still readable + * after that buffer's allocator is closed. + * With short strings. + */ + @Test + public void testSplitAndTransferWithShortStringOnSlicedBuffer() { + final byte [][] data = new byte[][]{STR4, STR5, STR6}; + final int startIndex = 0; + final int length = 2; + + BiConsumer validateVector = (targetVector, expectedData) -> { + IntStream.range(startIndex, length).forEach(i -> + assertArrayEquals(expectedData[i], targetVector.get(i - startIndex))); + }; + + try (final ViewVarCharVector targetVector = newViewVarCharVector("split-target", allocator)) { + try (final ViewVarCharVector sourceVector = newViewVarCharVector(EMPTY_SCHEMA_PATH, allocator)) { + testSplitAndTransferOnSlicedBufferHelper(targetVector, sourceVector, + startIndex, length, data); + } + validateVector.accept(targetVector, data); + } + + final byte [][] binaryData = generateBinaryDataArray(3, 10); + + try (final ViewVarBinaryVector targetVector = newViewVarBinaryVector("split-target", allocator)) { + try (final ViewVarBinaryVector sourceVector = newViewVarBinaryVector(EMPTY_SCHEMA_PATH, allocator)) { + testSplitAndTransferOnSlicedBufferHelper(targetVector, sourceVector, + startIndex, length, binaryData); + } + validateVector.accept(targetVector, binaryData); + } + } + + /** + * ARROW-7831: + * this checks a slice taken off a buffer is still readable + * after that buffer's allocator is closed. + * With a long string included. + */ + @Test + public void testSplitAndTransferWithLongStringsOnSlicedBuffer() { + final byte [][] data = new byte[][]{STR2, STR5, STR6}; + final int startIndex = 0; + final int length = 2; + + BiConsumer validateVector = (targetVector, expectedData) -> { + IntStream.range(startIndex, length).forEach(i -> + assertArrayEquals(expectedData[i], targetVector.get(i - startIndex))); + }; + + try (final ViewVarCharVector targetVector = newViewVarCharVector("split-target", allocator)) { + try (final ViewVarCharVector sourceVector = newViewVarCharVector(EMPTY_SCHEMA_PATH, allocator)) { + testSplitAndTransferOnSlicedBufferHelper(targetVector, sourceVector, + startIndex, length, data); + } + validateVector.accept(targetVector, data); + } + + final byte [][] binaryData = generateBinaryDataArray(3, 18); + try (final ViewVarBinaryVector targetVector = newViewVarBinaryVector("split-target", allocator)) { + try (final ViewVarBinaryVector sourceVector = newViewVarBinaryVector(EMPTY_SCHEMA_PATH, allocator)) { + testSplitAndTransferOnSlicedBufferHelper(targetVector, sourceVector, + startIndex, length, binaryData); + } + validateVector.accept(targetVector, binaryData); + } + } + + private void testSplitAndTransferOnSlicedVectorHelper(BaseVariableWidthViewVector sourceVector, + BaseVariableWidthViewVector targetVector, int startIndex, int length, byte[][] data) { + sourceVector.allocateNew(1024 * 10, 1024); + + for (int i = 0; i < data.length; i++) { + sourceVector.set(i, data[i]); + } + sourceVector.setValueCount(data.length); + + final long allocatedMem = allocator.getAllocatedMemory(); + final int validityRefCnt = sourceVector.getValidityBuffer().refCnt(); + final int dataRefCnt = sourceVector.getDataBuffer().refCnt(); + + sourceVector.splitAndTransferTo(startIndex, length, targetVector); + // we allocate view and data buffers for the target vector + assertTrue(allocatedMem < allocator.getAllocatedMemory()); + // The validity buffer is sliced from the same buffer.See BaseFixedWidthViewVector#allocateBytes. + // Therefore, the refcnt of the validity buffer is increased once since the startIndex is 0. + assertEquals(validityRefCnt + 1, sourceVector.getValidityBuffer().refCnt()); + // since the new view buffer is allocated, the refcnt is the same as the source vector. + assertEquals(dataRefCnt, sourceVector.getDataBuffer().refCnt()); + + for (int i = startIndex; i < length ; i++) { + assertArrayEquals(data[i], targetVector.get(i - startIndex)); + } + } + + /** + * ARROW-7831: + * this checks a vector that got sliced + * is still readable after the slice's allocator got closed. + * With short strings. + */ + @Test + public void testSplitAndTransferWithShortStringsOnSlicedVector() { + byte [][] data = new byte[][]{STR4, STR5, STR6}; + final int startIndex = 0; + final int length = 2; + + BiConsumer validateVector = (sourceVector, expectedData) -> { + IntStream.range(startIndex, length).forEach(i -> + assertArrayEquals(expectedData[i], sourceVector.get(i))); + }; + + try (final ViewVarCharVector sourceVector = newViewVarCharVector(EMPTY_SCHEMA_PATH, allocator)) { + try (final ViewVarCharVector targetVector = newViewVarCharVector("split-target", allocator)) { + testSplitAndTransferOnSlicedVectorHelper(sourceVector, targetVector, + startIndex, length, data); + } + validateVector.accept(sourceVector, data); + } + + byte [][] binaryData = generateBinaryDataArray(3, 10); + try (final ViewVarBinaryVector sourceVector = newViewVarBinaryVector(EMPTY_SCHEMA_PATH, allocator)) { + try (final ViewVarBinaryVector targetVector = newViewVarBinaryVector("split-target", allocator)) { + testSplitAndTransferOnSlicedVectorHelper(sourceVector, targetVector, + startIndex, length, binaryData); + } + validateVector.accept(sourceVector, binaryData); + } + } + + /** + * ARROW-7831: + * this checks a vector that got sliced + * is still readable after the slice's allocator got closed. + * With a long string included. + */ + @Test + public void testSplitAndTransferWithLongStringsOnSlicedVector() { + final byte [][] data = new byte[][]{STR2, STR5, STR6}; + final int startIndex = 0; + final int length = 2; + + BiConsumer validateVector = (sourceVector, expectedData) -> { + IntStream.range(startIndex, length).forEach(i -> + assertArrayEquals(expectedData[i], sourceVector.get(i))); + }; + + try (final ViewVarCharVector sourceVector = newViewVarCharVector(EMPTY_SCHEMA_PATH, allocator)) { + try (final ViewVarCharVector targetVector = newViewVarCharVector("split-target", allocator)) { + testSplitAndTransferOnSlicedVectorHelper(sourceVector, targetVector, + startIndex, length, data); + } + validateVector.accept(sourceVector, data); + } + + final byte [][] binaryData = generateBinaryDataArray(3, 20); + try (final ViewVarBinaryVector sourceVector = newViewVarBinaryVector(EMPTY_SCHEMA_PATH, allocator)) { + try (final ViewVarBinaryVector targetVector = newViewVarBinaryVector("split-target", allocator)) { + testSplitAndTransferOnSlicedVectorHelper(sourceVector, targetVector, + startIndex, length, binaryData); + } + validateVector.accept(sourceVector, binaryData); + } + } + + private void testSplitAndTransferOnValiditySplitHelper( + BaseVariableWidthViewVector targetVector, BaseVariableWidthViewVector sourceVector, + int startIndex, int length, byte[][] data) { + sourceVector.allocateNew(1024 * 10, 1024); + + sourceVector.set(0, new byte[0]); + sourceVector.setNull(1); + for (int i = 0; i < data.length; i++) { + if (data[i] == null) { + sourceVector.setNull(i); + } else { + sourceVector.set(i, data[i]); + } + } + sourceVector.setValueCount(data.length); + + final long allocatedMem = allocator.getAllocatedMemory(); + final int validityRefCnt = sourceVector.getValidityBuffer().refCnt(); + final int dataRefCnt = sourceVector.getDataBuffer().refCnt(); + + sourceVector.splitAndTransferTo(startIndex, length, targetVector); + // the allocation only consists in the size needed for the validity buffer + final long validitySize = + DefaultRoundingPolicy.DEFAULT_ROUNDING_POLICY.getRoundedSize( + BaseValueVector.getValidityBufferSizeFromCount(2)); + // we allocate view and data buffers for the target vector + assertTrue(allocatedMem + validitySize < allocator.getAllocatedMemory()); + // The validity is sliced from the same buffer.See BaseFixedWidthViewVector#allocateBytes. + // Since values up to the startIndex are empty/null validity refcnt should not change. + assertEquals(validityRefCnt, sourceVector.getValidityBuffer().refCnt()); + // since the new view buffer is allocated, the refcnt is the same as the source vector. + assertEquals(dataRefCnt, sourceVector.getDataBuffer().refCnt()); + + for (int i = startIndex; i < startIndex + length; i++) { + assertArrayEquals(data[i], targetVector.get(i - startIndex)); + } + + for (int i = 0; i < data.length; i++) { + if (data[i] == null) { + assertTrue(sourceVector.isNull(i)); + } else { + assertArrayEquals(data[i], sourceVector.get(i)); + } + } + } + + /** + * ARROW-7831: + * this checks a validity splitting where the validity buffer is sliced from the same buffer. + * In the case where all the values up to the start of the slice are null/empty. + * With short strings. + */ + @Test + public void testSplitAndTransferWithShortStringsOnValiditySplit() { + final byte [][] data = new byte[][]{new byte[0], null, STR4, STR5, STR6}; + final int startIndex = 2; + final int length = 2; + + try (final ViewVarCharVector targetVector = newViewVarCharVector("split-target", allocator); + final ViewVarCharVector sourceVector = newViewVarCharVector(EMPTY_SCHEMA_PATH, allocator)) { + testSplitAndTransferOnValiditySplitHelper(targetVector, sourceVector, + startIndex, length, data); + } + + final byte [][] binaryData = generateBinaryDataArray(5, 10); + binaryData[0] = new byte[0]; + binaryData[1] = null; + try (final ViewVarBinaryVector targetVector = newViewVarBinaryVector("split-target", allocator); + final ViewVarBinaryVector sourceVector = newViewVarBinaryVector(EMPTY_SCHEMA_PATH, allocator)) { + testSplitAndTransferOnValiditySplitHelper(targetVector, sourceVector, + startIndex, length, binaryData); + } + } + + /** + * ARROW-7831: + * this checks a validity splitting where the validity buffer is sliced from the same buffer. + * In the case where all the values up to the start of the slice are null/empty. + * With long strings. + */ + @Test + public void testSplitAndTransferWithLongStringsOnValiditySplit() { + final byte [][] data = new byte[][]{new byte[0], null, STR1, STR2, STR3}; + final int startIndex = 2; + final int length = 2; + + try (final ViewVarCharVector targetVector = newViewVarCharVector("split-target", allocator); + final ViewVarCharVector sourceVector = newViewVarCharVector(EMPTY_SCHEMA_PATH, allocator)) { + testSplitAndTransferOnValiditySplitHelper(targetVector, sourceVector, + startIndex, length, data); + } + + final byte [][] binaryData = generateBinaryDataArray(5, 18); + binaryData[0] = new byte[0]; + binaryData[1] = null; + + try (final ViewVarBinaryVector targetVector = newViewVarBinaryVector("split-target", allocator); + final ViewVarBinaryVector sourceVector = newViewVarBinaryVector(EMPTY_SCHEMA_PATH, allocator)) { + testSplitAndTransferOnValiditySplitHelper(targetVector, sourceVector, + startIndex, length, data); + } + } + + private void testSplitAndTransferOnAllocatorToAllocator(BaseVariableWidthViewVector targetVector, + BaseVariableWidthViewVector sourceVector, int startIndex, int length, byte[][] data) { + sourceVector.allocateNew(50, data.length); + + for (int i = 0; i < data.length; i++) { + sourceVector.set(i, data[i]); + } + sourceVector.setValueCount(data.length); + + final long allocatedMem = allocator.getAllocatedMemory(); + final int validityRefCnt = sourceVector.getValidityBuffer().refCnt(); + final int dataRefCnt = sourceVector.getDataBuffer().refCnt(); + + sourceVector.splitAndTransferTo(startIndex, length, targetVector); + + if (sourceVector.getDataBuffers().isEmpty()) { + // no extra allocation as strings are all inline + assertEquals(allocatedMem, allocator.getAllocatedMemory()); + } else { + // extra allocation as some strings are not inline + assertTrue(allocatedMem < allocator.getAllocatedMemory()); + } + + // the refcnts of each buffer for this test should be the same as what + // the source allocator ended up with. + assertEquals(validityRefCnt, sourceVector.getValidityBuffer().refCnt()); + // since the new view buffer is allocated, the refcnt is the same as the source vector. + assertEquals(dataRefCnt, sourceVector.getDataBuffer().refCnt()); + + for (int i = 0; i < data.length; i++) { + assertArrayEquals(data[i], sourceVector.get(i)); + } + } + + /** + * ARROW-7831: + * ensures that data is transferred from one allocator to another in case of 0-index + * start special cases. + * With short strings. + */ + @Test + public void testSplitAndTransferWithShortStringsOnAllocatorToAllocator() { + final int maxAllocation = 512; + final byte [][] data = new byte[][]{STR4, STR5, STR6}; + final int startIndex = 0; + final int length = 2; + + BiConsumer validateVector = (targetVector, expectedData) -> { + IntStream.range(startIndex, length).forEach(i -> + assertArrayEquals(expectedData[i], targetVector.get(i - startIndex))); + }; + + try (final BufferAllocator targetAllocator = allocator.newChildAllocator("target-alloc", 256, maxAllocation); + final ViewVarCharVector targetVector = newViewVarCharVector("split-target", targetAllocator)) { + try (final BufferAllocator sourceAllocator = allocator.newChildAllocator("source-alloc", 256, maxAllocation); + final ViewVarCharVector sourceVector = newViewVarCharVector(EMPTY_SCHEMA_PATH, sourceAllocator)) { + testSplitAndTransferOnAllocatorToAllocator(targetVector, sourceVector, + startIndex, length, data); + } + validateVector.accept(targetVector, data); + } + + final byte [][] binaryData = generateBinaryDataArray(3, 10); + try (final BufferAllocator targetAllocator = allocator.newChildAllocator("target-alloc", 256, maxAllocation); + final ViewVarBinaryVector targetVector = newViewVarBinaryVector("split-target", targetAllocator)) { + try (final BufferAllocator sourceAllocator = allocator.newChildAllocator("source-alloc", 256, maxAllocation); + final ViewVarBinaryVector sourceVector = newViewVarBinaryVector(EMPTY_SCHEMA_PATH, sourceAllocator)) { + testSplitAndTransferOnAllocatorToAllocator(targetVector, sourceVector, + startIndex, length, binaryData); + } + validateVector.accept(targetVector, binaryData); + } + } + + /** + * ARROW-7831: + * ensures that data is transferred from one allocator to another in case of 0-index + * start special cases. + * With long strings. + */ + @Test + public void testSplitAndTransferWithLongStringsOnAllocatorToAllocator() { + final int initialReservation = 1024; + // Here we have the target vector being transferred with a long string + // hence, the data buffer will be allocated. + // The default data buffer allocation takes + // BaseVariableWidthViewVector.INITIAL_VIEW_VALUE_ALLOCATION * BaseVariableWidthViewVector.ELEMENT_SIZE + final byte [][] data = new byte[][]{STR1, STR2, STR3}; + final int startIndex = 0; + final int length = 2; + + BiConsumer validateVector = (targetVector, expectedData) -> { + IntStream.range(startIndex, length).forEach(i -> + assertArrayEquals(expectedData[i], targetVector.get(i - startIndex))); + }; + + final int maxAllocation = initialReservation + + BaseVariableWidthViewVector.INITIAL_VIEW_VALUE_ALLOCATION * BaseVariableWidthViewVector.ELEMENT_SIZE; + try (final BufferAllocator targetAllocator = allocator.newChildAllocator("target-alloc", + initialReservation, maxAllocation); + final ViewVarCharVector targetVector = newViewVarCharVector("split-target", targetAllocator)) { + try (final BufferAllocator sourceAllocator = allocator.newChildAllocator("source-alloc", + initialReservation, maxAllocation); + final ViewVarCharVector sourceVector = newViewVarCharVector(EMPTY_SCHEMA_PATH, sourceAllocator)) { + testSplitAndTransferOnAllocatorToAllocator(targetVector, sourceVector, + startIndex, length, data); + } + validateVector.accept(targetVector, data); + } + + final byte [][] binaryData = generateBinaryDataArray(3, 18); + + try (final BufferAllocator targetAllocator = allocator.newChildAllocator("target-alloc", + initialReservation, maxAllocation); + final ViewVarBinaryVector targetVector = newViewVarBinaryVector("split-target", targetAllocator)) { + try (final BufferAllocator sourceAllocator = allocator.newChildAllocator("source-alloc", + initialReservation, maxAllocation); + final ViewVarBinaryVector sourceVector = newViewVarBinaryVector(EMPTY_SCHEMA_PATH, sourceAllocator)) { + testSplitAndTransferOnAllocatorToAllocator(targetVector, sourceVector, + startIndex, length, binaryData); + } + validateVector.accept(targetVector, binaryData); + } + } + + private void testReallocAfterVectorTransferHelper(BaseVariableWidthViewVector vector, + byte[] str1, byte[] str2) { + /* 4096 values with 16 bytes per record */ + final int bytesPerRecord = 32; + vector.allocateNew(4096 * bytesPerRecord, 4096); + int valueCapacity = vector.getValueCapacity(); + assertTrue(valueCapacity >= 4096); + + /* populate the vector */ + for (int i = 0; i < valueCapacity; i++) { + if ((i & 1) == 1) { + vector.set(i, str1); + } else { + vector.set(i, str2); + } + } + + /* Check the vector output */ + for (int i = 0; i < valueCapacity; i++) { + if ((i & 1) == 1) { + assertArrayEquals(str1, vector.get(i)); + } else { + assertArrayEquals(str2, vector.get(i)); + } + } + + /* trigger first realloc */ + vector.setSafe(valueCapacity, str2, 0, str2.length); + assertTrue(vector.getValueCapacity() >= 2 * valueCapacity); + while (vector.getByteCapacity() < bytesPerRecord * vector.getValueCapacity()) { + vector.reallocViewBuffer(); + vector.reallocViewDataBuffer(); + } + + /* populate the remaining vector */ + for (int i = valueCapacity; i < vector.getValueCapacity(); i++) { + if ((i & 1) == 1) { + vector.set(i, str1); + } else { + vector.set(i, str2); + } + } + + /* Check the vector output */ + valueCapacity = vector.getValueCapacity(); + for (int i = 0; i < valueCapacity; i++) { + if ((i & 1) == 1) { + assertArrayEquals(str1, vector.get(i)); + } else { + assertArrayEquals(str2, vector.get(i)); + } + } + + /* trigger second realloc */ + vector.setSafe(valueCapacity + bytesPerRecord, str2, 0, str2.length); + assertTrue(vector.getValueCapacity() >= 2 * valueCapacity); + while (vector.getByteCapacity() < bytesPerRecord * vector.getValueCapacity()) { + vector.reallocViewBuffer(); + vector.reallocViewDataBuffer(); + } + + /* populate the remaining vector */ + for (int i = valueCapacity; i < vector.getValueCapacity(); i++) { + if ((i & 1) == 1) { + vector.set(i, str1); + } else { + vector.set(i, str2); + } + } + + /* Check the vector output */ + valueCapacity = vector.getValueCapacity(); + for (int i = 0; i < valueCapacity; i++) { + if ((i & 1) == 1) { + assertArrayEquals(str1, vector.get(i)); + } else { + assertArrayEquals(str2, vector.get(i)); + } + } + + /* We are potentially working with 4x the size of vector buffer + * that we initially started with. + * Now let's transfer the vector. + */ + + TransferPair transferPair = vector.getTransferPair(allocator); + transferPair.transfer(); + BaseVariableWidthViewVector toVector = (BaseVariableWidthViewVector) transferPair.getTo(); + valueCapacity = toVector.getValueCapacity(); + + for (int i = 0; i < valueCapacity; i++) { + if ((i & 1) == 1) { + assertArrayEquals(str1, toVector.get(i)); + } else { + assertArrayEquals(str2, toVector.get(i)); + } + } + toVector.close(); + } + + @Test + public void testReallocAfterVectorTransfer() { + try (final ViewVarCharVector vector = new ViewVarCharVector(EMPTY_SCHEMA_PATH, allocator)) { + testReallocAfterVectorTransferHelper(vector, STR1, STR2); + } + + try (final ViewVarBinaryVector vector = new ViewVarBinaryVector(EMPTY_SCHEMA_PATH, allocator)) { + testReallocAfterVectorTransferHelper(vector, generateRandomBinaryData(12), + generateRandomBinaryData(13)); + } + } + + private void testSplitAndTransferWithMultipleDataBuffersHelper(BaseVariableWidthViewVector sourceVector, + BaseVariableWidthViewVector targetVector, int startIndex, int length, byte[][] data) { + sourceVector.allocateNew(48, 4); + + for (int i = 0; i < data.length; i++) { + sourceVector.set(i, data[i]); + } + sourceVector.setValueCount(data.length); + + // we should have multiple data buffers + assertTrue(sourceVector.getDataBuffers().size() > 1); + + final long allocatedMem = allocator.getAllocatedMemory(); + final int validityRefCnt = sourceVector.getValidityBuffer().refCnt(); + final int dataRefCnt = sourceVector.getDataBuffer().refCnt(); + + // split and transfer with slice starting at the beginning: + // this should not allocate anything new + sourceVector.splitAndTransferTo(startIndex, length, targetVector); + // we allocate view and data buffers for the target vector + assertTrue(allocatedMem < allocator.getAllocatedMemory()); + + // the refcnts of each buffer for this test should be the same as what + // the source allocator ended up with. + assertEquals(validityRefCnt, sourceVector.getValidityBuffer().refCnt()); + // since the new view buffer is allocated, the refcnt is the same as the source vector. + assertEquals(dataRefCnt, sourceVector.getDataBuffer().refCnt()); + + for (int i = 0; i < data.length; i++) { + assertArrayEquals(data[i], sourceVector.get(i)); + } + } + + /** + * ARROW-7831: + * ensures that data is transferred from one allocator to another in case of 0-index + * start special cases. + * With long strings and multiple data buffers. + * Check multi-data buffer source copying + */ + @Test + public void testSplitAndTransferWithMultipleDataBuffers() { + final String str4 = generateRandomString(35); + final byte[][] data = new byte[][]{STR1, STR2, STR3, str4.getBytes(StandardCharsets.UTF_8)}; + final int startIndex = 1; + final int length = 3; + + BiConsumer validateVector = (targetVector, expectedData) -> { + IntStream.range(startIndex, length).forEach(i -> + assertArrayEquals(expectedData[i], targetVector.get(i - startIndex))); + }; + + try (final ViewVarCharVector targetVector = new ViewVarCharVector("target", allocator)) { + try (final ViewVarCharVector sourceVector = new ViewVarCharVector("source", allocator)) { + testSplitAndTransferWithMultipleDataBuffersHelper(sourceVector, targetVector, + startIndex, length, data); + } + validateVector.accept(targetVector, data); + } + + try (final ViewVarBinaryVector targetVector = new ViewVarBinaryVector("target", allocator)) { + try (final ViewVarBinaryVector sourceVector = new ViewVarBinaryVector("source", allocator)) { + testSplitAndTransferWithMultipleDataBuffersHelper(sourceVector, targetVector, + startIndex, length, data); + } + validateVector.accept(targetVector, data); + } + } + + @Test + public void testVectorLoadUnloadOnMixedTypes() { + + try (final IntVector vector1 = new IntVector("myvector", allocator); + final ViewVarCharVector vector2 = new ViewVarCharVector("myviewvector", allocator)) { + + final int valueCount = 15; + + setVector(vector1, 1, 2, 3, 4, 5, 6); + vector1.setValueCount(valueCount); + + setVector(vector2, STR1, STR2, STR3, STR4, STR5, STR6); + vector1.setValueCount(valueCount); + + /* Check the vector output */ + assertEquals(1, vector1.get(0)); + assertEquals(2, vector1.get(1)); + assertEquals(3, vector1.get(2)); + assertEquals(4, vector1.get(3)); + assertEquals(5, vector1.get(4)); + assertEquals(6, vector1.get(5)); + + Field field1 = vector1.getField(); + String fieldName1 = field1.getName(); + + Field field2 = vector2.getField(); + String fieldName2 = field2.getName(); + + List fields = new ArrayList<>(2); + List fieldVectors = new ArrayList<>(2); + + fields.add(field1); + fields.add(field2); + fieldVectors.add(vector1); + fieldVectors.add(vector2); + + Schema schema = new Schema(fields); + + VectorSchemaRoot schemaRoot1 = new VectorSchemaRoot(schema, fieldVectors, valueCount); + VectorUnloader vectorUnloader = new VectorUnloader(schemaRoot1); + + try ( + ArrowRecordBatch recordBatch = vectorUnloader.getRecordBatch(); + BufferAllocator finalVectorsAllocator = allocator.newChildAllocator("new vector", 0, Long.MAX_VALUE); + VectorSchemaRoot schemaRoot2 = VectorSchemaRoot.create(schema, finalVectorsAllocator); + ) { + + // validating recordBatch contains an output for variadicBufferCounts + assertFalse(recordBatch.getVariadicBufferCounts().isEmpty()); + assertEquals(1, recordBatch.getVariadicBufferCounts().size()); + + VectorLoader vectorLoader = new VectorLoader(schemaRoot2); + vectorLoader.load(recordBatch); + + IntVector vector3 = (IntVector) schemaRoot2.getVector(fieldName1); + vector3.setValueCount(25); + + /* Check the vector output */ + assertEquals(1, vector3.get(0)); + assertEquals(2, vector3.get(1)); + assertEquals(3, vector3.get(2)); + assertEquals(4, vector3.get(3)); + assertEquals(5, vector3.get(4)); + assertEquals(6, vector3.get(5)); + + ViewVarCharVector vector4 = (ViewVarCharVector) schemaRoot2.getVector(fieldName2); + vector4.setValueCount(25); + + /* Check the vector output */ + assertArrayEquals(STR1, vector4.get(0)); + assertArrayEquals(STR2, vector4.get(1)); + assertArrayEquals(STR3, vector4.get(2)); + assertArrayEquals(STR4, vector4.get(3)); + assertArrayEquals(STR5, vector4.get(4)); + assertArrayEquals(STR6, vector4.get(5)); + } + } + } + + private String generateRandomString(int length) { + Random random = new Random(); + StringBuilder sb = new StringBuilder(length); + for (int i = 0; i < length; i++) { + sb.append(random.nextInt(10)); // 0-9 + } + return sb.toString(); + } +} diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestVectorAlloc.java b/java/vector/src/test/java/org/apache/arrow/vector/TestVectorAlloc.java index b96f6ab6afedd..02a85faa20cd6 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestVectorAlloc.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestVectorAlloc.java @@ -17,8 +17,8 @@ package org.apache.arrow.vector; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertTrue; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; import java.util.Arrays; import java.util.Collections; @@ -39,23 +39,23 @@ import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.FieldType; import org.apache.arrow.vector.types.pojo.Schema; -import org.junit.After; -import org.junit.Before; -import org.junit.Test; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; public class TestVectorAlloc { private BufferAllocator rootAllocator; private BufferAllocator policyAllocator; - @Before + @BeforeEach public void init() { rootAllocator = new RootAllocator(Long.MAX_VALUE); policyAllocator = new RootAllocator(AllocationListener.NOOP, Integer.MAX_VALUE, new CustomPolicy()); } - @After + @AfterEach public void terminate() throws Exception { rootAllocator.close(); policyAllocator.close(); diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestVectorReAlloc.java b/java/vector/src/test/java/org/apache/arrow/vector/TestVectorReAlloc.java index 9043bd4f8f2d4..21cbefae45161 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestVectorReAlloc.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestVectorReAlloc.java @@ -17,7 +17,10 @@ package org.apache.arrow.vector; -import static org.junit.Assert.*; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.junit.jupiter.api.Assertions.fail; import java.nio.charset.StandardCharsets; @@ -37,22 +40,21 @@ import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.FieldType; import org.apache.arrow.vector.util.DataSizeRoundingUtil; -import org.junit.After; -import org.junit.Assert; -import org.junit.Before; -import org.junit.Test; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; public class TestVectorReAlloc { private BufferAllocator allocator; - @Before + @BeforeEach public void init() { allocator = new RootAllocator(Long.MAX_VALUE); } - @After + @AfterEach public void terminate() throws Exception { allocator.close(); } @@ -68,7 +70,7 @@ public void testFixedType() { try { vector.set(initialCapacity, 0); - Assert.fail("Expected out of bounds exception"); + fail("Expected out of bounds exception"); } catch (Exception e) { // ok } @@ -92,7 +94,7 @@ public void testNullableType() { try { vector.set(initialCapacity, "foo".getBytes(StandardCharsets.UTF_8)); - Assert.fail("Expected out of bounds exception"); + fail("Expected out of bounds exception"); } catch (Exception e) { // ok } @@ -101,7 +103,7 @@ public void testNullableType() { assertTrue(vector.getValueCapacity() >= 2 * initialCapacity); vector.set(initialCapacity, "foo".getBytes(StandardCharsets.UTF_8)); - assertEquals("foo", new String(vector.get(initialCapacity), StandardCharsets.UTF_8)); + assertEquals(new String(vector.get(initialCapacity), StandardCharsets.UTF_8), "foo"); } } @@ -117,7 +119,7 @@ public void testListType() { try { vector.getInnerValueCountAt(2014); - Assert.fail("Expected out of bounds exception"); + fail("Expected out of bounds exception"); } catch (Exception e) { // ok } @@ -140,7 +142,7 @@ public void testStructType() { try { vector.getObject(513); - Assert.fail("Expected out of bounds exception"); + fail("Expected out of bounds exception"); } catch (Exception e) { // ok } @@ -161,7 +163,7 @@ public void testVariableWidthTypeSetNullValues() { for (int i = 0; i < numNullValues1; i++) { v1.setNull(i); } - Assert.assertTrue(v1.getBufferSizeFor(numNullValues1) > 0); + assertTrue(v1.getBufferSizeFor(numNullValues1) > 0); } try (final BaseLargeVariableWidthVector v2 = new LargeVarCharVector("var2", allocator)) { @@ -171,7 +173,7 @@ public void testVariableWidthTypeSetNullValues() { for (int i = 0; i < numNullValues2; i++) { v2.setNull(i); } - Assert.assertTrue(v2.getBufferSizeFor(numNullValues2) > 0); + assertTrue(v2.getBufferSizeFor(numNullValues2) > 0); } } @@ -194,7 +196,7 @@ public void testFixedAllocateAfterReAlloc() throws Exception { /* * Verify that the buffer sizes haven't changed. */ - Assert.assertEquals(vector.getValueCapacity(), savedValueCapacity); + assertEquals(vector.getValueCapacity(), savedValueCapacity); } } @@ -218,8 +220,8 @@ public void testVariableAllocateAfterReAlloc() throws Exception { /* * Verify that the buffer sizes haven't changed. */ - Assert.assertEquals(vector.getValueCapacity(), savedValueCapacity); - Assert.assertEquals(vector.valueBuffer.capacity(), savedValueBufferSize); + assertEquals(vector.getValueCapacity(), savedValueCapacity); + assertEquals(vector.valueBuffer.capacity(), savedValueBufferSize); } } @@ -243,8 +245,8 @@ public void testLargeVariableAllocateAfterReAlloc() throws Exception { /* * Verify that the buffer sizes haven't changed. */ - Assert.assertEquals(vector.getValueCapacity(), savedValueCapacity); - Assert.assertEquals(vector.valueBuffer.capacity(), savedValueBufferSize); + assertEquals(vector.getValueCapacity(), savedValueCapacity); + assertEquals(vector.valueBuffer.capacity(), savedValueBufferSize); } } @@ -256,8 +258,8 @@ public void testVarCharAllocateNew() throws Exception { vector.allocateNew(count); // verify that the validity buffer and value buffer have capacity for at least 'count' elements. - Assert.assertTrue(vector.getValidityBuffer().capacity() >= DataSizeRoundingUtil.divideBy8Ceil(count)); - Assert.assertTrue(vector.getOffsetBuffer().capacity() >= (count + 1) * BaseVariableWidthVector.OFFSET_WIDTH); + assertTrue(vector.getValidityBuffer().capacity() >= DataSizeRoundingUtil.divideBy8Ceil(count)); + assertTrue(vector.getOffsetBuffer().capacity() >= (count + 1) * BaseVariableWidthVector.OFFSET_WIDTH); } } @@ -269,8 +271,8 @@ public void testLargeVarCharAllocateNew() throws Exception { vector.allocateNew(count); // verify that the validity buffer and value buffer have capacity for at least 'count' elements. - Assert.assertTrue(vector.getValidityBuffer().capacity() >= DataSizeRoundingUtil.divideBy8Ceil(count)); - Assert.assertTrue(vector.getOffsetBuffer().capacity() >= (count + 1) * BaseLargeVariableWidthVector.OFFSET_WIDTH); + assertTrue(vector.getValidityBuffer().capacity() >= DataSizeRoundingUtil.divideBy8Ceil(count)); + assertTrue(vector.getOffsetBuffer().capacity() >= (count + 1) * BaseLargeVariableWidthVector.OFFSET_WIDTH); } } @@ -282,8 +284,8 @@ public void testVarCharAllocateNewUsingHelper() throws Exception { AllocationHelper.allocateNew(vector, count); // verify that the validity buffer and value buffer have capacity for at least 'count' elements. - Assert.assertTrue(vector.getValidityBuffer().capacity() >= DataSizeRoundingUtil.divideBy8Ceil(count)); - Assert.assertTrue(vector.getOffsetBuffer().capacity() >= (count + 1) * BaseVariableWidthVector.OFFSET_WIDTH); + assertTrue(vector.getValidityBuffer().capacity() >= DataSizeRoundingUtil.divideBy8Ceil(count)); + assertTrue(vector.getOffsetBuffer().capacity() >= (count + 1) * BaseVariableWidthVector.OFFSET_WIDTH); } } @@ -295,8 +297,8 @@ public void testLargeVarCharAllocateNewUsingHelper() throws Exception { AllocationHelper.allocateNew(vector, count); // verify that the validity buffer and value buffer have capacity for at least 'count' elements. - Assert.assertTrue(vector.getValidityBuffer().capacity() >= DataSizeRoundingUtil.divideBy8Ceil(count)); - Assert.assertTrue(vector.getOffsetBuffer().capacity() >= (count + 1) * BaseLargeVariableWidthVector.OFFSET_WIDTH); + assertTrue(vector.getValidityBuffer().capacity() >= DataSizeRoundingUtil.divideBy8Ceil(count)); + assertTrue(vector.getOffsetBuffer().capacity() >= (count + 1) * BaseLargeVariableWidthVector.OFFSET_WIDTH); } } @@ -314,7 +316,7 @@ public void testFixedRepeatedClearAndSet() throws Exception { } // should be deterministic, and not cause a run-away increase in capacity. - Assert.assertEquals(vector.getValueCapacity(), savedValueCapacity); + assertEquals(vector.getValueCapacity(), savedValueCapacity); } } @@ -333,7 +335,7 @@ public void testVariableRepeatedClearAndSet() throws Exception { } // should be deterministic, and not cause a run-away increase in capacity. - Assert.assertEquals(vector.getValueCapacity(), savedValueCapacity); + assertEquals(vector.getValueCapacity(), savedValueCapacity); } } @@ -359,7 +361,7 @@ public void testRepeatedValueVectorClearAndSet() throws Exception { } // should be deterministic, and not cause a run-away increase in capacity. - Assert.assertEquals(vector.getValueCapacity(), savedValueCapacity); + assertEquals(vector.getValueCapacity(), savedValueCapacity); } } @@ -386,7 +388,7 @@ public void testStructVectorClearAndSet() throws Exception { } // should be deterministic, and not cause a run-away increase in capacity. - Assert.assertEquals(vector.getValueCapacity(), savedValueCapacity); + assertEquals(vector.getValueCapacity(), savedValueCapacity); } } @@ -415,7 +417,7 @@ public void testFixedSizeListVectorClearAndSet() { } // should be deterministic, and not cause a run-away increase in capacity. - Assert.assertEquals(vector.getValueCapacity(), savedValueCapacity); + assertEquals(vector.getValueCapacity(), savedValueCapacity); } } @@ -440,7 +442,7 @@ public void testUnionVectorClearAndSet() { } // should be deterministic, and not cause a run-away increase in capacity. - Assert.assertEquals(vector.getValueCapacity(), savedValueCapacity); + assertEquals(vector.getValueCapacity(), savedValueCapacity); } } @@ -468,7 +470,7 @@ public void testDenseUnionVectorClearAndSet() { } // should be deterministic, and not cause a run-away increase in capacity. - Assert.assertEquals(vector.getValueCapacity(), savedValueCapacity); + assertEquals(vector.getValueCapacity(), savedValueCapacity); } } } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestVectorReset.java b/java/vector/src/test/java/org/apache/arrow/vector/TestVectorReset.java index 71009a3337510..2a6f86426ae8a 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestVectorReset.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestVectorReset.java @@ -17,8 +17,8 @@ package org.apache.arrow.vector; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertTrue; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; import java.nio.charset.StandardCharsets; @@ -34,20 +34,20 @@ import org.apache.arrow.vector.types.pojo.ArrowType.FixedSizeList; import org.apache.arrow.vector.types.pojo.ArrowType.Int; import org.apache.arrow.vector.types.pojo.FieldType; -import org.junit.After; -import org.junit.Before; -import org.junit.Test; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; public class TestVectorReset { private BufferAllocator allocator; - @Before + @BeforeEach public void init() { allocator = new RootAllocator(Long.MAX_VALUE); } - @After + @AfterEach public void terminate() throws Exception { allocator.close(); } @@ -93,6 +93,18 @@ public void testVariableTypeReset() { } } + @Test + public void testVariableViewTypeReset() { + try (final ViewVarCharVector vector = new ViewVarCharVector("ViewVarChar", allocator)) { + vector.allocateNewSafe(); + vector.set(0, "a".getBytes(StandardCharsets.UTF_8)); + vector.setLastSet(0); + vector.setValueCount(1); + resetVectorAndVerify(vector, vector.getBuffers(false)); + assertEquals(-1, vector.getLastSet()); + } + } + @Test public void testLargeVariableTypeReset() { try (final LargeVarCharVector vector = new LargeVarCharVector("LargeVarChar", allocator)) { diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestVectorSchemaRoot.java b/java/vector/src/test/java/org/apache/arrow/vector/TestVectorSchemaRoot.java index 207962eb45b85..76500052fa632 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestVectorSchemaRoot.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestVectorSchemaRoot.java @@ -17,10 +17,11 @@ package org.apache.arrow.vector; -import static junit.framework.TestCase.assertTrue; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertNotEquals; +import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNotEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; import java.util.ArrayList; import java.util.Arrays; @@ -35,20 +36,20 @@ import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.FieldType; import org.apache.arrow.vector.types.pojo.Schema; -import org.junit.After; -import org.junit.Before; -import org.junit.Test; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; public class TestVectorSchemaRoot { private BufferAllocator allocator; - @Before + @BeforeEach public void init() { allocator = new RootAllocator(Long.MAX_VALUE); } - @After + @AfterEach public void terminate() { allocator.close(); } @@ -226,20 +227,22 @@ public void testSlice() { } } - @Test(expected = IllegalArgumentException.class) + @Test public void testSliceWithInvalidParam() { - try (final IntVector intVector = new IntVector("intVector", allocator); - final Float4Vector float4Vector = new Float4Vector("float4Vector", allocator)) { - intVector.setValueCount(10); - float4Vector.setValueCount(10); - for (int i = 0; i < 10; i++) { - intVector.setSafe(i, i); - float4Vector.setSafe(i, i + 0.1f); - } - final VectorSchemaRoot original = new VectorSchemaRoot(Arrays.asList(intVector, float4Vector)); + assertThrows(IllegalArgumentException.class, () -> { + try (final IntVector intVector = new IntVector("intVector", allocator); + final Float4Vector float4Vector = new Float4Vector("float4Vector", allocator)) { + intVector.setValueCount(10); + float4Vector.setValueCount(10); + for (int i = 0; i < 10; i++) { + intVector.setSafe(i, i); + float4Vector.setSafe(i, i + 0.1f); + } + final VectorSchemaRoot original = new VectorSchemaRoot(Arrays.asList(intVector, float4Vector)); - original.slice(0, 20); - } + original.slice(0, 20); + } + }); } @Test diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestVectorUnloadLoad.java b/java/vector/src/test/java/org/apache/arrow/vector/TestVectorUnloadLoad.java index eac72f4b2c893..82ae5c038cbc2 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestVectorUnloadLoad.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestVectorUnloadLoad.java @@ -18,9 +18,9 @@ package org.apache.arrow.vector; import static java.util.Arrays.asList; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertTrue; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; import java.io.IOException; import java.util.ArrayList; @@ -44,21 +44,20 @@ import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.FieldType; import org.apache.arrow.vector.types.pojo.Schema; -import org.junit.After; -import org.junit.Assert; -import org.junit.Before; -import org.junit.Test; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; public class TestVectorUnloadLoad { private BufferAllocator allocator; - @Before + @BeforeEach public void init() { allocator = new RootAllocator(Long.MAX_VALUE); } - @After + @AfterEach public void terminate() throws Exception { allocator.close(); } @@ -116,9 +115,9 @@ public void testUnloadLoad() throws IOException { FieldReader bigIntReader = newRoot.getVector("bigInt").getReader(); for (int i = 0; i < count; i++) { intReader.setPosition(i); - Assert.assertEquals(i, intReader.readInteger().intValue()); + assertEquals(i, intReader.readInteger().intValue()); bigIntReader.setPosition(i); - Assert.assertEquals(i, bigIntReader.readLong().longValue()); + assertEquals(i, bigIntReader.readLong().longValue()); } } } @@ -188,7 +187,7 @@ public void testUnloadLoadAddPadding() throws IOException { for (int j = 0; j < i % 4 + 1; j++) { expected.add(i); } - Assert.assertEquals(expected, reader.readObject()); + assertEquals(expected, reader.readObject()); } } @@ -256,9 +255,9 @@ public void testLoadValidityBuffer() throws IOException { IntVector intDefinedVector = (IntVector) newRoot.getVector("intDefined"); IntVector intNullVector = (IntVector) newRoot.getVector("intNull"); for (int i = 0; i < count; i++) { - assertFalse("#" + i, intDefinedVector.isNull(i)); - assertEquals("#" + i, i, intDefinedVector.get(i)); - assertTrue("#" + i, intNullVector.isNull(i)); + assertFalse(intDefinedVector.isNull(i), "#" + i); + assertEquals(i, intDefinedVector.get(i), "#" + i); + assertTrue(intNullVector.isNull(i), "#" + i); } intDefinedVector.setSafe(count + 10, 1234); assertTrue(intDefinedVector.isNull(count + 1)); @@ -319,13 +318,13 @@ public void testUnloadLoadDuplicates() throws IOException { vectorLoader.load(recordBatch); List targets = newRoot.getFieldVectors(); - Assert.assertEquals(sources.size(), targets.size()); + assertEquals(sources.size(), targets.size()); for (int k = 0; k < sources.size(); k++) { IntVector src = (IntVector) sources.get(k); IntVector tgt = (IntVector) targets.get(k); - Assert.assertEquals(src.getValueCount(), tgt.getValueCount()); + assertEquals(src.getValueCount(), tgt.getValueCount()); for (int i = 0; i < count; i++) { - Assert.assertEquals(src.get(i), tgt.get(i)); + assertEquals(src.get(i), tgt.get(i)); } } } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/compare/TestRangeEqualsVisitor.java b/java/vector/src/test/java/org/apache/arrow/vector/compare/TestRangeEqualsVisitor.java index ab8c6c634891e..c3e7ef8bf8b08 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/compare/TestRangeEqualsVisitor.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/compare/TestRangeEqualsVisitor.java @@ -18,8 +18,8 @@ package org.apache.arrow.vector.compare; import static org.apache.arrow.vector.testing.ValueVectorDataPopulator.setVector; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertTrue; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; import java.nio.charset.Charset; import java.util.Arrays; @@ -33,6 +33,7 @@ import org.apache.arrow.vector.IntVector; import org.apache.arrow.vector.LargeVarCharVector; import org.apache.arrow.vector.VarCharVector; +import org.apache.arrow.vector.ViewVarCharVector; import org.apache.arrow.vector.ZeroVector; import org.apache.arrow.vector.compare.util.ValueEpsilonEqualizers; import org.apache.arrow.vector.complex.DenseUnionVector; @@ -53,16 +54,16 @@ import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.FieldType; -import org.junit.After; -import org.junit.Before; -import org.junit.Ignore; -import org.junit.Test; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Disabled; +import org.junit.jupiter.api.Test; public class TestRangeEqualsVisitor { private BufferAllocator allocator; - @Before + @BeforeEach public void init() { allocator = new RootAllocator(Long.MAX_VALUE); } @@ -71,8 +72,11 @@ public void init() { private static final byte[] STR1 = "AAAAA1".getBytes(utf8Charset); private static final byte[] STR2 = "BBBBBBBBB2".getBytes(utf8Charset); private static final byte[] STR3 = "CCCC3".getBytes(utf8Charset); + private static final byte[] STR4 = "12345678901234A".getBytes(utf8Charset); + private static final byte[] STR5 = "A2345678901234ABC".getBytes(utf8Charset); + private static final byte[] STR6 = "AB45678901234ABCD".getBytes(utf8Charset); - @After + @AfterEach public void terminate() throws Exception { allocator.close(); } @@ -132,6 +136,55 @@ public void testBaseVariableVectorRangeEquals() { } } + @Test + public void testBaseVariableViewVectorRangeEquals() { + try (final ViewVarCharVector vector1 = new ViewVarCharVector("varchar", allocator); + final ViewVarCharVector vector2 = new ViewVarCharVector("varchar", allocator)) { + + setVector(vector1, STR1, STR2, STR4, STR3, STR2, STR5, STR1, STR6, STR1, STR2, STR4); + setVector(vector2, STR1, STR2, STR4, STR3, STR2, STR5, STR1, STR6, STR1, STR2, STR4); + + RangeEqualsVisitor visitor = new RangeEqualsVisitor(vector1, vector2); + // inclusion of long string in the middle + assertTrue(visitor.rangeEquals(new Range(1, 1, 3))); + assertFalse(visitor.rangeEquals(new Range(0, 1, 4))); + // inclusion of long string at the start + assertTrue(visitor.rangeEquals(new Range(2, 2, 4))); + assertFalse(visitor.rangeEquals(new Range(2, 5, 4))); + // inclusion of long string at the end + assertTrue(visitor.rangeEquals(new Range(4, 4, 4))); + // unequal range + assertTrue(visitor.rangeEquals(new Range(8, 0, 3))); + assertFalse(visitor.rangeEquals(new Range(4, 5, 3))); + + // checking the same ranges when nulls are set + + vector1.setNull(1); + vector2.setNull(1); + + vector1.setNull(3); + vector2.setNull(3); + + vector1.setNull(5); + vector2.setNull(5); + + vector1.setNull(9); + vector2.setNull(9); + + // inclusion of long string in the middle + assertTrue(visitor.rangeEquals(new Range(1, 1, 3))); + assertFalse(visitor.rangeEquals(new Range(0, 1, 4))); + // inclusion of long string at the start + assertTrue(visitor.rangeEquals(new Range(2, 2, 4))); + assertFalse(visitor.rangeEquals(new Range(2, 5, 4))); + // inclusion of long string at the end + assertTrue(visitor.rangeEquals(new Range(4, 4, 4))); + // unequal range + assertTrue(visitor.rangeEquals(new Range(8, 0, 3))); + assertFalse(visitor.rangeEquals(new Range(4, 5, 3))); + } + } + @Test public void testListVectorWithDifferentChild() { try (final ListVector vector1 = ListVector.empty("list", allocator); @@ -476,7 +529,7 @@ public void testDenseUnionVectorEquals() { } } - @Ignore + @Disabled @Test public void testEqualsWithOutTypeCheck() { try (final IntVector intVector = new IntVector("int", allocator); diff --git a/java/vector/src/test/java/org/apache/arrow/vector/compare/TestTypeEqualsVisitor.java b/java/vector/src/test/java/org/apache/arrow/vector/compare/TestTypeEqualsVisitor.java index 62fa0336ea925..6ff81faba73e8 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/compare/TestTypeEqualsVisitor.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/compare/TestTypeEqualsVisitor.java @@ -17,9 +17,10 @@ package org.apache.arrow.vector.compare; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertTrue; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; +import java.nio.charset.StandardCharsets; import java.util.HashMap; import java.util.Map; @@ -30,6 +31,8 @@ import org.apache.arrow.vector.Float8Vector; import org.apache.arrow.vector.IntVector; import org.apache.arrow.vector.VarCharVector; +import org.apache.arrow.vector.ViewVarBinaryVector; +import org.apache.arrow.vector.ViewVarCharVector; import org.apache.arrow.vector.complex.DenseUnionVector; import org.apache.arrow.vector.complex.ListVector; import org.apache.arrow.vector.complex.StructVector; @@ -38,20 +41,20 @@ import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.FieldType; -import org.junit.After; -import org.junit.Before; -import org.junit.Test; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; public class TestTypeEqualsVisitor { private BufferAllocator allocator; - @Before + @BeforeEach public void init() { allocator = new RootAllocator(Long.MAX_VALUE); } - @After + @AfterEach public void terminate() throws Exception { allocator.close(); } @@ -176,4 +179,42 @@ public void testDenseUnionTypeEquals() { assertFalse(typeVisitor.equals(vector1)); } } + + @Test + public void testStringViewTypeEquals() { + try (final ViewVarCharVector varchar1 = new ViewVarCharVector("varchar1", allocator); + final ViewVarCharVector varchar2 = new ViewVarCharVector("varchar2", allocator); + final ViewVarBinaryVector binary = new ViewVarBinaryVector("binary", allocator)) { + final int valueCount = 2; + final byte[] str0 = "apache".getBytes(StandardCharsets.UTF_8); + final byte[] str1 = "arrow".getBytes(StandardCharsets.UTF_8); + + // add elements for varchar1 + varchar1.allocateNew(48, valueCount); + varchar1.set(0, str0); + varchar1.set(1, str1); + varchar1.setValueCount(valueCount); + + // add elements for varchar2 in a difference order + varchar2.allocateNew(48, valueCount); + varchar2.set(0, str1); + varchar2.set(1, str0); + varchar2.setValueCount(valueCount); + + // add elements for binary + binary.allocateNew(48, valueCount); + binary.set(0, str0); + binary.set(1, str1); + binary.setValueCount(valueCount); + + // compare ignore check name + TypeEqualsVisitor visitor = new TypeEqualsVisitor(varchar1, /* check name */ false, /* check meta data */ true); + assertTrue(visitor.equals(varchar2)); + assertFalse(visitor.equals(binary)); + + // if we check names, the types should be different + visitor = new TypeEqualsVisitor(varchar1, /* check name */ true, /* check meta data */ true); + assertFalse(visitor.equals(varchar2)); + } + } } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/complex/TestDenseUnionBufferSize.java b/java/vector/src/test/java/org/apache/arrow/vector/complex/TestDenseUnionBufferSize.java index 82ef7a479d05c..0e24fd0af6806 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/complex/TestDenseUnionBufferSize.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/complex/TestDenseUnionBufferSize.java @@ -17,7 +17,8 @@ package org.apache.arrow.vector.complex; -import static org.junit.jupiter.api.Assertions.*; +import static org.junit.jupiter.api.Assertions.assertDoesNotThrow; +import static org.junit.jupiter.api.Assertions.assertEquals; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; diff --git a/java/vector/src/test/java/org/apache/arrow/vector/complex/impl/TestComplexCopier.java b/java/vector/src/test/java/org/apache/arrow/vector/complex/impl/TestComplexCopier.java index 29f25170332a2..67bdb9945fc94 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/complex/impl/TestComplexCopier.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/complex/impl/TestComplexCopier.java @@ -17,8 +17,8 @@ package org.apache.arrow.vector.complex.impl; -import static org.junit.Assert.assertTrue; import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; import java.math.BigDecimal; @@ -39,9 +39,9 @@ import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.types.pojo.FieldType; import org.apache.arrow.vector.util.DecimalUtility; -import org.junit.After; -import org.junit.Before; -import org.junit.Test; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; public class TestComplexCopier { @@ -49,12 +49,12 @@ public class TestComplexCopier { private static final int COUNT = 100; - @Before + @BeforeEach public void init() { allocator = new RootAllocator(Long.MAX_VALUE); } - @After + @AfterEach public void terminate() throws Exception { allocator.close(); } @@ -90,7 +90,6 @@ public void testCopyFixedSizeListVector() { // validate equals assertTrue(VectorEqualsVisitor.vectorEquals(from, to)); - } } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/complex/impl/TestPromotableWriter.java b/java/vector/src/test/java/org/apache/arrow/vector/complex/impl/TestPromotableWriter.java index b7fc681c16118..3a54d539c290a 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/complex/impl/TestPromotableWriter.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/complex/impl/TestPromotableWriter.java @@ -17,9 +17,9 @@ package org.apache.arrow.vector.complex.impl; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertNull; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNull; import static org.junit.jupiter.api.Assertions.assertThrows; import java.nio.ByteBuffer; @@ -50,21 +50,21 @@ import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.FieldType; import org.apache.arrow.vector.util.Text; -import org.junit.After; -import org.junit.Before; -import org.junit.Test; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; public class TestPromotableWriter { private static final String EMPTY_SCHEMA_PATH = ""; private BufferAllocator allocator; - @Before + @BeforeEach public void init() { allocator = new DirtyRootAllocator(Long.MAX_VALUE, (byte) 100); } - @After + @AfterEach public void terminate() throws Exception { allocator.close(); } @@ -128,33 +128,33 @@ public void testPromoteToUnion() throws Exception { final UnionVector uv = v.getChild("A", UnionVector.class); - assertFalse("0 shouldn't be null", uv.isNull(0)); + assertFalse(uv.isNull(0), "0 shouldn't be null"); assertEquals(false, uv.getObject(0)); - assertFalse("1 shouldn't be null", uv.isNull(1)); + assertFalse(uv.isNull(1), "1 shouldn't be null"); assertEquals(true, uv.getObject(1)); - assertFalse("2 shouldn't be null", uv.isNull(2)); + assertFalse(uv.isNull(2), "2 shouldn't be null"); assertEquals(10, uv.getObject(2)); - assertNull("3 should be null", uv.getObject(3)); + assertNull(uv.getObject(3), "3 should be null"); - assertFalse("4 shouldn't be null", uv.isNull(4)); + assertFalse(uv.isNull(4), "4 shouldn't be null"); assertEquals(100, uv.getObject(4)); - assertFalse("5 shouldn't be null", uv.isNull(5)); + assertFalse(uv.isNull(5), "5 shouldn't be null"); assertEquals(123123L, uv.getObject(5)); - assertFalse("6 shouldn't be null", uv.isNull(6)); + assertFalse(uv.isNull(6), "6 shouldn't be null"); NullableTimeStampMilliTZHolder readBackHolder = new NullableTimeStampMilliTZHolder(); uv.getTimeStampMilliTZVector().get(6, readBackHolder); assertEquals(12345L, readBackHolder.value); assertEquals("UTC", readBackHolder.timezone); - assertFalse("7 shouldn't be null", uv.isNull(7)); + assertFalse(uv.isNull(7), "7 shouldn't be null"); assertEquals(444413L, ((java.time.Duration) uv.getObject(7)).getSeconds()); - assertFalse("8 shouldn't be null", uv.isNull(8)); + assertFalse(uv.isNull(8), "8 shouldn't be null"); assertEquals(18978, ByteBuffer.wrap(uv.getFixedSizeBinaryVector().get(8)).order(ByteOrder.nativeOrder()).getInt()); @@ -172,10 +172,10 @@ public void testPromoteToUnion() throws Exception { Field childField1 = container.getField().getChildren().get(0).getChildren().get(0); Field childField2 = container.getField().getChildren().get(0).getChildren().get(1); - assertEquals("Child field should be union type: " + - childField1.getName(), ArrowTypeID.Union, childField1.getType().getTypeID()); - assertEquals("Child field should be decimal type: " + - childField2.getName(), ArrowTypeID.Decimal, childField2.getType().getTypeID()); + assertEquals(ArrowTypeID.Union, childField1.getType().getTypeID(), + "Child field should be union type: " + childField1.getName()); + assertEquals(ArrowTypeID.Decimal, childField2.getType().getTypeID(), + "Child field should be decimal type: " + childField2.getName()); buf.close(); } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestComplexWriter.java b/java/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestComplexWriter.java index 19f0ea9d4e392..c7ed893d4c340 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestComplexWriter.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestComplexWriter.java @@ -17,7 +17,12 @@ package org.apache.arrow.vector.complex.writer; -import static org.junit.Assert.*; +import static org.junit.jupiter.api.Assertions.assertArrayEquals; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertTrue; import java.math.BigDecimal; import java.nio.ByteBuffer; @@ -92,10 +97,9 @@ import org.apache.arrow.vector.util.JsonStringHashMap; import org.apache.arrow.vector.util.Text; import org.apache.arrow.vector.util.TransferPair; -import org.junit.After; -import org.junit.Assert; -import org.junit.Before; -import org.junit.Test; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; public class TestComplexWriter { @@ -103,12 +107,12 @@ public class TestComplexWriter { private static final int COUNT = 100; - @Before + @BeforeEach public void init() { allocator = new RootAllocator(Integer.MAX_VALUE); } - @After + @AfterEach public void terminate() throws Exception { allocator.close(); } @@ -119,8 +123,8 @@ public void simpleNestedTypes() { StructReader rootReader = new SingleStructReaderImpl(parent).reader("root"); for (int i = 0; i < COUNT; i++) { rootReader.setPosition(i); - Assert.assertEquals(i, rootReader.reader("int").readInteger().intValue()); - Assert.assertEquals(i, rootReader.reader("bigInt").readLong().longValue()); + assertEquals(i, rootReader.reader("int").readInteger().intValue()); + assertEquals(i, rootReader.reader("bigInt").readLong().longValue()); } parent.close(); @@ -210,15 +214,15 @@ private void checkNullableStruct(NonNullableStructVector structVector) { StructReader rootReader = new SingleStructReaderImpl(structVector).reader("root"); for (int i = 0; i < COUNT; i++) { rootReader.setPosition(i); - assertTrue("index is set: " + i, rootReader.isSet()); + assertTrue(rootReader.isSet(), "index is set: " + i); FieldReader struct = rootReader.reader("struct"); if (i % 2 == 0) { - assertTrue("index is set: " + i, struct.isSet()); - assertNotNull("index is set: " + i, struct.readObject()); + assertTrue(struct.isSet(), "index is set: " + i); + assertNotNull(struct.readObject(), "index is set: " + i); assertEquals(i, struct.reader("nested").readLong().longValue()); } else { - assertFalse("index is not set: " + i, struct.isSet()); - assertNull("index is not set: " + i, struct.readObject()); + assertFalse(struct.isSet(), "index is not set: " + i); + assertNull(struct.readObject(), "index is not set: " + i); } } } @@ -245,11 +249,11 @@ public void testList() { StructReader rootReader = new SingleStructReaderImpl(parent).reader("root"); rootReader.setPosition(0); - assertTrue("row 0 list is not set", rootReader.reader("list").isSet()); + assertTrue(rootReader.reader("list").isSet(), "row 0 list is not set"); assertEquals(Long.valueOf(0), rootReader.reader("list").reader().readLong()); rootReader.setPosition(1); - assertFalse("row 1 list is set", rootReader.reader("list").isSet()); + assertFalse(rootReader.reader("list").isSet(), "row 1 list is set"); } } @@ -312,9 +316,9 @@ public void testListScalarNull() { for (int j = 0; j < i % 7; j++) { listReader.next(); if (j % 2 == 0) { - assertFalse("index is set: " + j, listReader.reader().isSet()); + assertFalse(listReader.reader().isSet(), "index is set: " + j); } else { - assertTrue("index is not set: " + j, listReader.reader().isSet()); + assertTrue(listReader.reader().isSet(), "index is not set: " + j); assertEquals(j, listReader.reader().readInteger().intValue()); } } @@ -392,7 +396,7 @@ public void listTimeStampMilliTZType() { for (int j = 0; j < i % 7; j++) { listReader.next(); if (j % 2 == 0) { - assertFalse("index is set: " + j, listReader.reader().isSet()); + assertFalse(listReader.reader().isSet(), "index is set: " + j); } else { NullableTimeStampMilliTZHolder actual = new NullableTimeStampMilliTZHolder(); listReader.reader().read(actual); @@ -430,7 +434,7 @@ public void listDurationType() { for (int j = 0; j < i % 7; j++) { listReader.next(); if (j % 2 == 0) { - assertFalse("index is set: " + j, listReader.reader().isSet()); + assertFalse(listReader.reader().isSet(), "index is set: " + j); } else { NullableDurationHolder actual = new NullableDurationHolder(); listReader.reader().read(actual); @@ -472,7 +476,7 @@ public void listFixedSizeBinaryType() throws Exception { for (int j = 0; j < i % 7; j++) { listReader.next(); if (j % 2 == 0) { - assertFalse("index is set: " + j, listReader.reader().isSet()); + assertFalse(listReader.reader().isSet(), "index is set: " + j); } else { NullableFixedSizeBinaryHolder actual = new NullableFixedSizeBinaryHolder(); listReader.reader().read(actual); @@ -505,11 +509,11 @@ public void listScalarTypeNullable() { for (int i = 0; i < COUNT; i++) { listReader.setPosition(i); if (i % 2 == 0) { - assertTrue("index is set: " + i, listReader.isSet()); - assertEquals("correct length at: " + i, i % 7, ((List) listReader.readObject()).size()); + assertTrue(listReader.isSet(), "index is set: " + i); + assertEquals(i % 7, ((List) listReader.readObject()).size(), "correct length at: " + i); } else { - assertFalse("index is not set: " + i, listReader.isSet()); - assertNull("index is not set: " + i, listReader.readObject()); + assertFalse(listReader.isSet(), "index is not set: " + i); + assertNull(listReader.readObject(), "index is not set: " + i); } } } @@ -537,8 +541,8 @@ public void listStructType() { listReader.setPosition(i); for (int j = 0; j < i % 7; j++) { listReader.next(); - Assert.assertEquals("record: " + i, j, listReader.reader().reader("int").readInteger().intValue()); - Assert.assertEquals(j, listReader.reader().reader("bigInt").readLong().longValue()); + assertEquals(j, listReader.reader().reader("int").readInteger().intValue(), "record: " + i); + assertEquals(j, listReader.reader().reader("bigInt").readLong().longValue()); } } } @@ -601,7 +605,7 @@ private void checkListOfLists(final ListVector listVector) { FieldReader innerListReader = listReader.reader(); for (int k = 0; k < i % 13; k++) { innerListReader.next(); - Assert.assertEquals("record: " + i, k, innerListReader.reader().readInteger().intValue()); + assertEquals(k, innerListReader.reader().readInteger().intValue(), "record: " + i); } } } @@ -673,9 +677,9 @@ private void checkUnionList(ListVector listVector) { for (int k = 0; k < i % 13; k++) { innerListReader.next(); if (k % 2 == 0) { - Assert.assertEquals("record: " + i, k, innerListReader.reader().readInteger().intValue()); + assertEquals(k, innerListReader.reader().readInteger().intValue(), "record: " + i); } else { - Assert.assertEquals("record: " + i, k, innerListReader.reader().readLong().longValue()); + assertEquals(k, innerListReader.reader().readLong().longValue(), "record: " + i); } } } @@ -724,11 +728,11 @@ private void checkListMap(ListVector listVector) { UnionMapReader mapReader = (UnionMapReader) listReader.reader(); for (int k = 0; k < i % 13; k++) { mapReader.next(); - Assert.assertEquals("record key: " + i, k, mapReader.key().readInteger().intValue()); + assertEquals(k, mapReader.key().readInteger().intValue(), "record key: " + i); if (k % 2 == 0) { - Assert.assertEquals("record value: " + i, k, mapReader.value().readLong().longValue()); + assertEquals(k, mapReader.value().readLong().longValue(), "record value: " + i); } else { - Assert.assertNull("record value: " + i, mapReader.value().readLong()); + assertNull(mapReader.value().readLong(), "record value: " + i); } } } @@ -772,24 +776,24 @@ public void simpleUnion() throws Exception { for (int i = 0; i < COUNT; i++) { unionReader.setPosition(i); if (i % 5 == 0) { - Assert.assertEquals(i, unionReader.readInteger().intValue()); + assertEquals(i, unionReader.readInteger().intValue()); } else if (i % 5 == 1) { NullableTimeStampMilliTZHolder holder = new NullableTimeStampMilliTZHolder(); unionReader.read(holder); - Assert.assertEquals(i, holder.value); - Assert.assertEquals("AsdfTimeZone", holder.timezone); + assertEquals(i, holder.value); + assertEquals("AsdfTimeZone", holder.timezone); } else if (i % 5 == 2) { NullableDurationHolder holder = new NullableDurationHolder(); unionReader.read(holder); - Assert.assertEquals(i, holder.value); - Assert.assertEquals(TimeUnit.NANOSECOND, holder.unit); + assertEquals(i, holder.value); + assertEquals(TimeUnit.NANOSECOND, holder.unit); } else if (i % 5 == 3) { NullableFixedSizeBinaryHolder holder = new NullableFixedSizeBinaryHolder(); unionReader.read(holder); assertEquals(i, holder.buffer.getInt(0)); assertEquals(4, holder.byteWidth); } else { - Assert.assertEquals((float) i, unionReader.readFloat(), 1e-12); + assertEquals((float) i, unionReader.readFloat(), 1e-12); } } vector.close(); @@ -808,12 +812,12 @@ public void promotableWriter() { bigIntWriter.writeBigInt(i); } Field field = parent.getField().getChildren().get(0).getChildren().get(0); - Assert.assertEquals("a", field.getName()); - Assert.assertEquals(Int.TYPE_TYPE, field.getType().getTypeID()); + assertEquals("a", field.getName()); + assertEquals(Int.TYPE_TYPE, field.getType().getTypeID()); Int intType = (Int) field.getType(); - Assert.assertEquals(64, intType.getBitWidth()); - Assert.assertTrue(intType.getIsSigned()); + assertEquals(64, intType.getBitWidth()); + assertTrue(intType.getIsSigned()); for (int i = 100; i < 200; i++) { VarCharWriter varCharWriter = rootWriter.varChar("a"); varCharWriter.setPosition(i); @@ -824,23 +828,23 @@ public void promotableWriter() { tempBuf.close(); } field = parent.getField().getChildren().get(0).getChildren().get(0); - Assert.assertEquals("a", field.getName()); - Assert.assertEquals(Union.TYPE_TYPE, field.getType().getTypeID()); - Assert.assertEquals(Int.TYPE_TYPE, field.getChildren().get(0).getType().getTypeID()); - Assert.assertEquals(Utf8.TYPE_TYPE, field.getChildren().get(1).getType().getTypeID()); + assertEquals("a", field.getName()); + assertEquals(Union.TYPE_TYPE, field.getType().getTypeID()); + assertEquals(Int.TYPE_TYPE, field.getChildren().get(0).getType().getTypeID()); + assertEquals(Utf8.TYPE_TYPE, field.getChildren().get(1).getType().getTypeID()); StructReader rootReader = new SingleStructReaderImpl(parent).reader("root"); for (int i = 0; i < 100; i++) { rootReader.setPosition(i); FieldReader reader = rootReader.reader("a"); Long value = reader.readLong(); - Assert.assertNotNull("index: " + i, value); - Assert.assertEquals(i, value.intValue()); + assertNotNull(value, "index: " + i); + assertEquals(i, value.intValue()); } for (int i = 100; i < 200; i++) { rootReader.setPosition(i); FieldReader reader = rootReader.reader("a"); Text value = reader.readText(); - Assert.assertEquals(Integer.toString(i), value.toString()); + assertEquals(Integer.toString(i), value.toString()); } } } @@ -857,14 +861,14 @@ public void promotableWriterSchema() { rootWriter.varChar("a"); Field field = parent.getField().getChildren().get(0).getChildren().get(0); - Assert.assertEquals("a", field.getName()); - Assert.assertEquals(ArrowTypeID.Union, field.getType().getTypeID()); + assertEquals("a", field.getName()); + assertEquals(ArrowTypeID.Union, field.getType().getTypeID()); - Assert.assertEquals(ArrowTypeID.Int, field.getChildren().get(0).getType().getTypeID()); + assertEquals(ArrowTypeID.Int, field.getChildren().get(0).getType().getTypeID()); Int intType = (Int) field.getChildren().get(0).getType(); - Assert.assertEquals(64, intType.getBitWidth()); - Assert.assertTrue(intType.getIsSigned()); - Assert.assertEquals(ArrowTypeID.Utf8, field.getChildren().get(1).getType().getTypeID()); + assertEquals(64, intType.getBitWidth()); + assertTrue(intType.getIsSigned()); + assertEquals(ArrowTypeID.Utf8, field.getChildren().get(1).getType().getTypeID()); } } @@ -901,18 +905,18 @@ public void structWriterMixedCaseFieldNames() { List fieldsCaseSensitive = parent.getField().getChildren().get(0).getChildren(); Set fieldNamesCaseSensitive = getFieldNames(fieldsCaseSensitive); - Assert.assertEquals(11, fieldNamesCaseSensitive.size()); - Assert.assertTrue(fieldNamesCaseSensitive.contains("int_field")); - Assert.assertTrue(fieldNamesCaseSensitive.contains("Int_Field")); - Assert.assertTrue(fieldNamesCaseSensitive.contains("float_field")); - Assert.assertTrue(fieldNamesCaseSensitive.contains("Float_Field")); - Assert.assertTrue(fieldNamesCaseSensitive.contains("struct_field")); - Assert.assertTrue(fieldNamesCaseSensitive.contains("struct_field::char_field")); - Assert.assertTrue(fieldNamesCaseSensitive.contains("struct_field::Char_Field")); - Assert.assertTrue(fieldNamesCaseSensitive.contains("list_field")); - Assert.assertTrue(fieldNamesCaseSensitive.contains("list_field::$data$")); - Assert.assertTrue(fieldNamesCaseSensitive.contains("list_field::$data$::bit_field")); - Assert.assertTrue(fieldNamesCaseSensitive.contains("list_field::$data$::Bit_Field")); + assertEquals(11, fieldNamesCaseSensitive.size()); + assertTrue(fieldNamesCaseSensitive.contains("int_field")); + assertTrue(fieldNamesCaseSensitive.contains("Int_Field")); + assertTrue(fieldNamesCaseSensitive.contains("float_field")); + assertTrue(fieldNamesCaseSensitive.contains("Float_Field")); + assertTrue(fieldNamesCaseSensitive.contains("struct_field")); + assertTrue(fieldNamesCaseSensitive.contains("struct_field::char_field")); + assertTrue(fieldNamesCaseSensitive.contains("struct_field::Char_Field")); + assertTrue(fieldNamesCaseSensitive.contains("list_field")); + assertTrue(fieldNamesCaseSensitive.contains("list_field::$data$")); + assertTrue(fieldNamesCaseSensitive.contains("list_field::$data$::bit_field")); + assertTrue(fieldNamesCaseSensitive.contains("list_field::$data$::Bit_Field")); // test case-insensitive StructWriter ComplexWriter writerCaseInsensitive = new ComplexWriterImpl("rootCaseInsensitive", parent, false, false); @@ -932,14 +936,14 @@ public void structWriterMixedCaseFieldNames() { List fieldsCaseInsensitive = parent.getField().getChildren().get(1).getChildren(); Set fieldNamesCaseInsensitive = getFieldNames(fieldsCaseInsensitive); - Assert.assertEquals(7, fieldNamesCaseInsensitive.size()); - Assert.assertTrue(fieldNamesCaseInsensitive.contains("int_field")); - Assert.assertTrue(fieldNamesCaseInsensitive.contains("float_field")); - Assert.assertTrue(fieldNamesCaseInsensitive.contains("struct_field")); - Assert.assertTrue(fieldNamesCaseInsensitive.contains("struct_field::char_field")); - Assert.assertTrue(fieldNamesCaseSensitive.contains("list_field")); - Assert.assertTrue(fieldNamesCaseSensitive.contains("list_field::$data$")); - Assert.assertTrue(fieldNamesCaseSensitive.contains("list_field::$data$::bit_field")); + assertEquals(7, fieldNamesCaseInsensitive.size()); + assertTrue(fieldNamesCaseInsensitive.contains("int_field")); + assertTrue(fieldNamesCaseInsensitive.contains("float_field")); + assertTrue(fieldNamesCaseInsensitive.contains("struct_field")); + assertTrue(fieldNamesCaseInsensitive.contains("struct_field::char_field")); + assertTrue(fieldNamesCaseSensitive.contains("list_field")); + assertTrue(fieldNamesCaseSensitive.contains("list_field::$data$")); + assertTrue(fieldNamesCaseSensitive.contains("list_field::$data$::bit_field")); } } @@ -976,15 +980,15 @@ public void timeStampSecWriter() throws Exception { FieldReader secReader = rootReader.reader("sec"); secReader.setPosition(0); LocalDateTime secDateTime = secReader.readLocalDateTime(); - Assert.assertEquals(expectedSecDateTime, secDateTime); + assertEquals(expectedSecDateTime, secDateTime); long secLong = secReader.readLong(); - Assert.assertEquals(expectedSecs, secLong); + assertEquals(expectedSecs, secLong); } { FieldReader secTZReader = rootReader.reader("secTZ"); secTZReader.setPosition(1); long secTZLong = secTZReader.readLong(); - Assert.assertEquals(expectedSecs, secTZLong); + assertEquals(expectedSecs, secTZLong); } } } @@ -1022,27 +1026,27 @@ public void timeStampMilliWriters() throws Exception { FieldReader milliReader = rootReader.reader("milli"); milliReader.setPosition(0); LocalDateTime milliDateTime = milliReader.readLocalDateTime(); - Assert.assertEquals(expectedMilliDateTime, milliDateTime); + assertEquals(expectedMilliDateTime, milliDateTime); long milliLong = milliReader.readLong(); - Assert.assertEquals(expectedMillis, milliLong); + assertEquals(expectedMillis, milliLong); } { FieldReader milliTZReader = rootReader.reader("milliTZ"); milliTZReader.setPosition(0); long milliTZLong = milliTZReader.readLong(); - Assert.assertEquals(expectedMillis, milliTZLong); + assertEquals(expectedMillis, milliTZLong); } } } private void checkTimestampField(Field field, String name) { - Assert.assertEquals(name, field.getName()); - Assert.assertEquals(ArrowType.Timestamp.TYPE_TYPE, field.getType().getTypeID()); + assertEquals(name, field.getName()); + assertEquals(ArrowType.Timestamp.TYPE_TYPE, field.getType().getTypeID()); } private void checkTimestampTZField(Field field, String name, String tz) { checkTimestampField(field, name); - Assert.assertEquals(tz, ((Timestamp) field.getType()).getTimezone()); + assertEquals(tz, ((Timestamp) field.getType()).getTimezone()); } @Test @@ -1079,15 +1083,15 @@ public void timeStampMicroWriters() throws Exception { FieldReader microReader = rootReader.reader("micro"); microReader.setPosition(0); LocalDateTime microDateTime = microReader.readLocalDateTime(); - Assert.assertEquals(expectedMicroDateTime, microDateTime); + assertEquals(expectedMicroDateTime, microDateTime); long microLong = microReader.readLong(); - Assert.assertEquals(expectedMicros, microLong); + assertEquals(expectedMicros, microLong); } { FieldReader microReader = rootReader.reader("microTZ"); microReader.setPosition(1); long microLong = microReader.readLong(); - Assert.assertEquals(expectedMicros, microLong); + assertEquals(expectedMicros, microLong); } } } @@ -1125,18 +1129,18 @@ public void timeStampNanoWriters() throws Exception { FieldReader nanoReader = rootReader.reader("nano"); nanoReader.setPosition(0); LocalDateTime nanoDateTime = nanoReader.readLocalDateTime(); - Assert.assertEquals(expectedNanoDateTime, nanoDateTime); + assertEquals(expectedNanoDateTime, nanoDateTime); long nanoLong = nanoReader.readLong(); - Assert.assertEquals(expectedNanos, nanoLong); + assertEquals(expectedNanos, nanoLong); } { FieldReader nanoReader = rootReader.reader("nanoTZ"); nanoReader.setPosition(0); long nanoLong = nanoReader.readLong(); - Assert.assertEquals(expectedNanos, nanoLong); + assertEquals(expectedNanos, nanoLong); NullableTimeStampNanoTZHolder h = new NullableTimeStampNanoTZHolder(); nanoReader.read(h); - Assert.assertEquals(expectedNanos, h.value); + assertEquals(expectedNanos, h.value); } } @@ -1173,8 +1177,8 @@ public void fixedSizeBinaryWriters() throws Exception { // schema List children = parent.getField().getChildren().get(0).getChildren(); - Assert.assertEquals(fieldName, children.get(0).getName()); - Assert.assertEquals(ArrowType.FixedSizeBinary.TYPE_TYPE, children.get(0).getType().getTypeID()); + assertEquals(fieldName, children.get(0).getName()); + assertEquals(ArrowType.FixedSizeBinary.TYPE_TYPE, children.get(0).getType().getTypeID()); // read StructReader rootReader = new SingleStructReaderImpl(parent).reader("root"); @@ -1183,7 +1187,7 @@ public void fixedSizeBinaryWriters() throws Exception { for (int i = 0; i < numValues; i++) { fixedSizeBinaryReader.setPosition(i); byte[] readValues = fixedSizeBinaryReader.readByteArray(); - Assert.assertArrayEquals(values[i], readValues); + assertArrayEquals(values[i], readValues); } } @@ -1369,17 +1373,17 @@ public void testListWriterWithNulls() { for (int i = 0; i < COUNT; i++) { listReader.setPosition(i); if (i % 2 == 0) { - Assert.assertTrue(listReader.isSet()); + assertTrue(listReader.isSet()); listReader.next(); if (i % 4 == 0) { - Assert.assertNull(listReader.reader().readInteger()); + assertNull(listReader.reader().readInteger()); } else { - Assert.assertEquals(i, listReader.reader().readInteger().intValue()); + assertEquals(i, listReader.reader().readInteger().intValue()); listReader.next(); - Assert.assertEquals(i * 2, listReader.reader().readInteger().intValue()); + assertEquals(i * 2, listReader.reader().readInteger().intValue()); } } else { - Assert.assertFalse(listReader.isSet()); + assertFalse(listReader.isSet()); } } } @@ -1419,20 +1423,20 @@ public void testListOfListWriterWithNulls() { for (int i = 0; i < COUNT; i++) { listReader.setPosition(i); if (i % 2 == 0) { - Assert.assertTrue(listReader.isSet()); + assertTrue(listReader.isSet()); listReader.next(); if (i % 4 == 0) { - Assert.assertFalse(listReader.reader().isSet()); + assertFalse(listReader.reader().isSet()); } else { listReader.reader().next(); - Assert.assertFalse(listReader.reader().reader().isSet()); + assertFalse(listReader.reader().reader().isSet()); listReader.reader().next(); - Assert.assertEquals(i, listReader.reader().reader().readInteger().intValue()); + assertEquals(i, listReader.reader().reader().readInteger().intValue()); listReader.reader().next(); - Assert.assertEquals(i * 2, listReader.reader().reader().readInteger().intValue()); + assertEquals(i * 2, listReader.reader().reader().readInteger().intValue()); } } else { - Assert.assertFalse(listReader.isSet()); + assertFalse(listReader.isSet()); } } } @@ -1478,23 +1482,23 @@ public void testListOfListOfListWriterWithNulls() { for (int i = 0; i < COUNT; i++) { listReader.setPosition(i); if (i % 4 == 0) { - Assert.assertFalse(listReader.isSet()); + assertFalse(listReader.isSet()); } else { - Assert.assertTrue(listReader.isSet()); + assertTrue(listReader.isSet()); listReader.next(); if (i % 4 == 1) { - Assert.assertFalse(listReader.reader().isSet()); + assertFalse(listReader.reader().isSet()); } else if (i % 4 == 2) { listReader.reader().next(); - Assert.assertFalse(listReader.reader().reader().isSet()); + assertFalse(listReader.reader().reader().isSet()); } else { listReader.reader().next(); listReader.reader().reader().next(); - Assert.assertFalse(listReader.reader().reader().reader().isSet()); + assertFalse(listReader.reader().reader().reader().isSet()); listReader.reader().reader().next(); - Assert.assertEquals(i, listReader.reader().reader().reader().readInteger().intValue()); + assertEquals(i, listReader.reader().reader().reader().readInteger().intValue()); listReader.reader().reader().next(); - Assert.assertEquals(i * 2, listReader.reader().reader().reader().readInteger().intValue()); + assertEquals(i * 2, listReader.reader().reader().reader().readInteger().intValue()); } } } @@ -1507,7 +1511,7 @@ public void testStructOfList() { structVector.addOrGetList("childList1"); NullableStructReaderImpl structReader = structVector.getReader(); FieldReader childListReader = structReader.reader("childList1"); - Assert.assertNotNull(childListReader); + assertNotNull(childListReader); } try (StructVector structVector = StructVector.empty("struct2", allocator)) { @@ -1523,9 +1527,9 @@ public void testStructOfList() { NullableStructReaderImpl structReader = structVector.getReader(); FieldReader childListReader = structReader.reader("childList2"); int size = childListReader.size(); - Assert.assertEquals(1, size); + assertEquals(1, size); int data = childListReader.reader().readInteger(); - Assert.assertEquals(10, data); + assertEquals(10, data); } try (StructVector structVector = StructVector.empty("struct3", allocator)) { @@ -1545,9 +1549,9 @@ public void testStructOfList() { structReader.setPosition(3); FieldReader childListReader = structReader.reader("childList3"); int size = childListReader.size(); - Assert.assertEquals(1, size); + assertEquals(1, size); int data = ((List) childListReader.readObject()).get(0); - Assert.assertEquals(3, data); + assertEquals(3, data); } try (StructVector structVector = StructVector.empty("struct4", allocator)) { @@ -1564,7 +1568,7 @@ public void testStructOfList() { structReader.setPosition(3); FieldReader childListReader = structReader.reader("childList4"); int size = childListReader.size(); - Assert.assertEquals(0, size); + assertEquals(0, size); } } @@ -1618,7 +1622,7 @@ public void testMapWithNulls() { mapWriter.endMap(); writer.setValueCount(1); UnionMapReader mapReader = (UnionMapReader) new SingleStructReaderImpl(parent).reader("root"); - Assert.assertNull(mapReader.key().readInteger()); + assertNull(mapReader.key().readInteger()); assertEquals(1, mapReader.value().readInteger().intValue()); } } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestSimpleWriter.java b/java/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestSimpleWriter.java index 27b8f1796ee31..f17c370c89522 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestSimpleWriter.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestSimpleWriter.java @@ -17,6 +17,9 @@ package org.apache.arrow.vector.complex.writer; +import static org.junit.jupiter.api.Assertions.assertArrayEquals; +import static org.junit.jupiter.api.Assertions.assertEquals; + import java.nio.ByteBuffer; import org.apache.arrow.memory.BufferAllocator; @@ -30,21 +33,20 @@ import org.apache.arrow.vector.complex.impl.VarBinaryWriterImpl; import org.apache.arrow.vector.complex.impl.VarCharWriterImpl; import org.apache.arrow.vector.util.Text; -import org.junit.After; -import org.junit.Assert; -import org.junit.Before; -import org.junit.Test; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; public class TestSimpleWriter { private BufferAllocator allocator; - @Before + @BeforeEach public void init() { allocator = new RootAllocator(Integer.MAX_VALUE); } - @After + @AfterEach public void terminate() throws Exception { allocator.close(); } @@ -56,7 +58,7 @@ public void testWriteByteArrayToVarBinary() throws Exception { byte[] input = new byte[] { 0x01, 0x02 }; writer.writeVarBinary(input); byte[] result = vector.get(0); - Assert.assertArrayEquals(input, result); + assertArrayEquals(input, result); } } @@ -67,7 +69,7 @@ public void testWriteByteArrayWithOffsetToVarBinary() throws Exception { byte[] input = new byte[] { 0x01, 0x02 }; writer.writeVarBinary(input, 1, 1); byte[] result = vector.get(0); - Assert.assertArrayEquals(new byte[] { 0x02 }, result); + assertArrayEquals(new byte[] { 0x02 }, result); } } @@ -79,7 +81,7 @@ public void testWriteByteBufferToVarBinary() throws Exception { ByteBuffer buffer = ByteBuffer.wrap(input); writer.writeVarBinary(buffer); byte[] result = vector.get(0); - Assert.assertArrayEquals(input, result); + assertArrayEquals(input, result); } } @@ -91,7 +93,7 @@ public void testWriteByteBufferWithOffsetToVarBinary() throws Exception { ByteBuffer buffer = ByteBuffer.wrap(input); writer.writeVarBinary(buffer, 1, 1); byte[] result = vector.get(0); - Assert.assertArrayEquals(new byte[] { 0x02 }, result); + assertArrayEquals(new byte[] { 0x02 }, result); } } @@ -102,7 +104,7 @@ public void testWriteByteArrayToLargeVarBinary() throws Exception { byte[] input = new byte[] { 0x01, 0x02 }; writer.writeLargeVarBinary(input); byte[] result = vector.get(0); - Assert.assertArrayEquals(input, result); + assertArrayEquals(input, result); } } @@ -113,7 +115,7 @@ public void testWriteByteArrayWithOffsetToLargeVarBinary() throws Exception { byte[] input = new byte[] { 0x01, 0x02 }; writer.writeLargeVarBinary(input, 1, 1); byte[] result = vector.get(0); - Assert.assertArrayEquals(new byte[] { 0x02 }, result); + assertArrayEquals(new byte[] { 0x02 }, result); } } @@ -125,7 +127,7 @@ public void testWriteByteBufferToLargeVarBinary() throws Exception { ByteBuffer buffer = ByteBuffer.wrap(input); writer.writeLargeVarBinary(buffer); byte[] result = vector.get(0); - Assert.assertArrayEquals(input, result); + assertArrayEquals(input, result); } } @@ -137,7 +139,7 @@ public void testWriteByteBufferWithOffsetToLargeVarBinary() throws Exception { ByteBuffer buffer = ByteBuffer.wrap(input); writer.writeLargeVarBinary(buffer, 1, 1); byte[] result = vector.get(0); - Assert.assertArrayEquals(new byte[] { 0x02 }, result); + assertArrayEquals(new byte[] { 0x02 }, result); } } @@ -148,7 +150,7 @@ public void testWriteStringToVarChar() throws Exception { String input = "testInput"; writer.writeVarChar(input); String result = vector.getObject(0).toString(); - Assert.assertEquals(input, result); + assertEquals(input, result); } } @@ -159,7 +161,7 @@ public void testWriteTextToVarChar() throws Exception { String input = "testInput"; writer.writeVarChar(new Text(input)); String result = vector.getObject(0).toString(); - Assert.assertEquals(input, result); + assertEquals(input, result); } } @@ -170,7 +172,7 @@ public void testWriteStringToLargeVarChar() throws Exception { String input = "testInput"; writer.writeLargeVarChar(input); String result = vector.getObject(0).toString(); - Assert.assertEquals(input, result); + assertEquals(input, result); } } @@ -181,7 +183,7 @@ public void testWriteTextToLargeVarChar() throws Exception { String input = "testInput"; writer.writeLargeVarChar(new Text(input)); String result = vector.getObject(0).toString(); - Assert.assertEquals(input, result); + assertEquals(input, result); } } } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/ipc/BaseFileTest.java b/java/vector/src/test/java/org/apache/arrow/vector/ipc/BaseFileTest.java index de9187edb667e..77eeb3589058d 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/ipc/BaseFileTest.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/ipc/BaseFileTest.java @@ -18,9 +18,12 @@ package org.apache.arrow.vector.ipc; import static org.apache.arrow.vector.TestUtils.newVarCharVector; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertTrue; +import static org.junit.jupiter.api.Assertions.assertArrayEquals; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertTrue; import java.io.IOException; import java.math.BigDecimal; @@ -84,9 +87,8 @@ import org.apache.arrow.vector.types.pojo.FieldType; import org.apache.arrow.vector.util.JsonStringArrayList; import org.apache.arrow.vector.util.Text; -import org.junit.After; -import org.junit.Assert; -import org.junit.Before; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -98,12 +100,12 @@ public class BaseFileTest { protected static final int COUNT = 10; protected BufferAllocator allocator; - @Before + @BeforeEach public void init() { allocator = new RootAllocator(Integer.MAX_VALUE); } - @After + @AfterEach public void tearDown() { allocator.close(); } @@ -150,17 +152,20 @@ protected void writeData(int count, StructVector parent) { protected void validateContent(int count, VectorSchemaRoot root) { for (int i = 0; i < count; i++) { - Assert.assertEquals(i, root.getVector("int").getObject(i)); - Assert.assertEquals((Short) uint1Values[i % uint1Values.length], + assertEquals(i, root.getVector("int").getObject(i)); + assertEquals((Short) uint1Values[i % uint1Values.length], ((UInt1Vector) root.getVector("uint1")).getObjectNoOverflow(i)); - Assert.assertEquals("Failed for index: " + i, (Character) uint2Values[i % uint2Values.length], - (Character) ((UInt2Vector) root.getVector("uint2")).get(i)); - Assert.assertEquals("Failed for index: " + i, (Long) uint4Values[i % uint4Values.length], - ((UInt4Vector) root.getVector("uint4")).getObjectNoOverflow(i)); - Assert.assertEquals("Failed for index: " + i, uint8Values[i % uint8Values.length], - ((UInt8Vector) root.getVector("uint8")).getObjectNoOverflow(i)); - Assert.assertEquals(Long.valueOf(i), root.getVector("bigInt").getObject(i)); - Assert.assertEquals(i == 0 ? Float.NaN : i, root.getVector("float").getObject(i)); + assertEquals((Character) uint2Values[i % uint2Values.length], + (Character) ((UInt2Vector) root.getVector("uint2")).get(i), + "Failed for index: " + i); + assertEquals((Long) uint4Values[i % uint4Values.length], + ((UInt4Vector) root.getVector("uint4")).getObjectNoOverflow(i), + "Failed for index: " + i); + assertEquals(uint8Values[i % uint8Values.length], + ((UInt8Vector) root.getVector("uint8")).getObjectNoOverflow(i), + "Failed for index: " + i); + assertEquals(Long.valueOf(i), root.getVector("bigInt").getObject(i)); + assertEquals(i == 0 ? Float.NaN : i, root.getVector("float").getObject(i)); } } @@ -210,23 +215,23 @@ public void printVectors(List vectors) { } protected void validateComplexContent(int count, VectorSchemaRoot root) { - Assert.assertEquals(count, root.getRowCount()); + assertEquals(count, root.getRowCount()); printVectors(root.getFieldVectors()); for (int i = 0; i < count; i++) { Object intVal = root.getVector("int").getObject(i); if (i % 5 != 3) { - Assert.assertEquals(i, intVal); + assertEquals(i, intVal); } else { - Assert.assertNull(intVal); + assertNull(intVal); } - Assert.assertEquals(Long.valueOf(i), root.getVector("bigInt").getObject(i)); - Assert.assertEquals(i % 3, ((List) root.getVector("list").getObject(i)).size()); + assertEquals(Long.valueOf(i), root.getVector("bigInt").getObject(i)); + assertEquals(i % 3, ((List) root.getVector("list").getObject(i)).size()); NullableTimeStampMilliHolder h = new NullableTimeStampMilliHolder(); FieldReader structReader = root.getVector("struct").getReader(); structReader.setPosition(i); structReader.reader("timestamp").read(h); - Assert.assertEquals(i, h.value); + assertEquals(i, h.value); } } @@ -235,7 +240,7 @@ private LocalDateTime makeDateTimeFromCount(int i) { } protected void writeDateTimeData(int count, StructVector parent) { - Assert.assertTrue(count < 100); + assertTrue(count < 100); ComplexWriter writer = new ComplexWriterImpl("root", parent); StructWriter rootWriter = writer.rootAsStruct(); DateMilliWriter dateWriter = rootWriter.dateMilli("date"); @@ -268,22 +273,22 @@ protected void writeDateTimeData(int count, StructVector parent) { } protected void validateDateTimeContent(int count, VectorSchemaRoot root) { - Assert.assertEquals(count, root.getRowCount()); + assertEquals(count, root.getRowCount()); printVectors(root.getFieldVectors()); for (int i = 0; i < count; i++) { LocalDateTime dt = makeDateTimeFromCount(i); LocalDateTime dtMilli = dt.minusNanos(i); LocalDateTime dateVal = ((DateMilliVector) root.getVector("date")).getObject(i); LocalDateTime dateExpected = dt.toLocalDate().atStartOfDay(); - Assert.assertEquals(dateExpected, dateVal); + assertEquals(dateExpected, dateVal); LocalTime timeVal = ((TimeMilliVector) root.getVector("time")).getObject(i).toLocalTime(); - Assert.assertEquals(dtMilli.toLocalTime(), timeVal); + assertEquals(dtMilli.toLocalTime(), timeVal); Object timestampMilliVal = root.getVector("timestamp-milli").getObject(i); - Assert.assertEquals(dtMilli, timestampMilliVal); + assertEquals(dtMilli, timestampMilliVal); Object timestampMilliTZVal = root.getVector("timestamp-milliTZ").getObject(i); - Assert.assertEquals(dt.atZone(ZoneId.of("Europe/Paris")).toInstant().toEpochMilli(), timestampMilliTZVal); + assertEquals(dt.atZone(ZoneId.of("Europe/Paris")).toInstant().toEpochMilli(), timestampMilliTZVal); Object timestampNanoVal = root.getVector("timestamp-nano").getObject(i); - Assert.assertEquals(dt, timestampNanoVal); + assertEquals(dt, timestampNanoVal); } } @@ -355,66 +360,66 @@ protected VectorSchemaRoot writeFlatDictionaryData( protected void validateFlatDictionary(VectorSchemaRoot root, DictionaryProvider provider) { FieldVector vector1A = root.getVector("varcharA"); - Assert.assertNotNull(vector1A); + assertNotNull(vector1A); DictionaryEncoding encoding1A = vector1A.getField().getDictionary(); - Assert.assertNotNull(encoding1A); - Assert.assertEquals(1L, encoding1A.getId()); + assertNotNull(encoding1A); + assertEquals(1L, encoding1A.getId()); - Assert.assertEquals(6, vector1A.getValueCount()); - Assert.assertEquals(0, vector1A.getObject(0)); - Assert.assertEquals(1, vector1A.getObject(1)); - Assert.assertEquals(null, vector1A.getObject(2)); - Assert.assertEquals(2, vector1A.getObject(3)); - Assert.assertEquals(1, vector1A.getObject(4)); - Assert.assertEquals(2, vector1A.getObject(5)); + assertEquals(6, vector1A.getValueCount()); + assertEquals(0, vector1A.getObject(0)); + assertEquals(1, vector1A.getObject(1)); + assertEquals(null, vector1A.getObject(2)); + assertEquals(2, vector1A.getObject(3)); + assertEquals(1, vector1A.getObject(4)); + assertEquals(2, vector1A.getObject(5)); FieldVector vector1B = root.getVector("varcharB"); - Assert.assertNotNull(vector1B); + assertNotNull(vector1B); DictionaryEncoding encoding1B = vector1A.getField().getDictionary(); - Assert.assertNotNull(encoding1B); - Assert.assertTrue(encoding1A.equals(encoding1B)); - Assert.assertEquals(1L, encoding1B.getId()); - - Assert.assertEquals(6, vector1B.getValueCount()); - Assert.assertEquals(2, vector1B.getObject(0)); - Assert.assertEquals(1, vector1B.getObject(1)); - Assert.assertEquals(2, vector1B.getObject(2)); - Assert.assertEquals(null, vector1B.getObject(3)); - Assert.assertEquals(1, vector1B.getObject(4)); - Assert.assertEquals(0, vector1B.getObject(5)); + assertNotNull(encoding1B); + assertTrue(encoding1A.equals(encoding1B)); + assertEquals(1L, encoding1B.getId()); + + assertEquals(6, vector1B.getValueCount()); + assertEquals(2, vector1B.getObject(0)); + assertEquals(1, vector1B.getObject(1)); + assertEquals(2, vector1B.getObject(2)); + assertEquals(null, vector1B.getObject(3)); + assertEquals(1, vector1B.getObject(4)); + assertEquals(0, vector1B.getObject(5)); FieldVector vector2 = root.getVector("sizes"); - Assert.assertNotNull(vector2); + assertNotNull(vector2); DictionaryEncoding encoding2 = vector2.getField().getDictionary(); - Assert.assertNotNull(encoding2); - Assert.assertEquals(2L, encoding2.getId()); + assertNotNull(encoding2); + assertEquals(2L, encoding2.getId()); - Assert.assertEquals(6, vector2.getValueCount()); - Assert.assertEquals(null, vector2.getObject(0)); - Assert.assertEquals(2, vector2.getObject(1)); - Assert.assertEquals(1, vector2.getObject(2)); - Assert.assertEquals(1, vector2.getObject(3)); - Assert.assertEquals(2, vector2.getObject(4)); - Assert.assertEquals(null, vector2.getObject(5)); + assertEquals(6, vector2.getValueCount()); + assertEquals(null, vector2.getObject(0)); + assertEquals(2, vector2.getObject(1)); + assertEquals(1, vector2.getObject(2)); + assertEquals(1, vector2.getObject(3)); + assertEquals(2, vector2.getObject(4)); + assertEquals(null, vector2.getObject(5)); Dictionary dictionary1 = provider.lookup(1L); - Assert.assertNotNull(dictionary1); + assertNotNull(dictionary1); VarCharVector dictionaryVector = ((VarCharVector) dictionary1.getVector()); - Assert.assertEquals(3, dictionaryVector.getValueCount()); - Assert.assertEquals(new Text("foo"), dictionaryVector.getObject(0)); - Assert.assertEquals(new Text("bar"), dictionaryVector.getObject(1)); - Assert.assertEquals(new Text("baz"), dictionaryVector.getObject(2)); + assertEquals(3, dictionaryVector.getValueCount()); + assertEquals(new Text("foo"), dictionaryVector.getObject(0)); + assertEquals(new Text("bar"), dictionaryVector.getObject(1)); + assertEquals(new Text("baz"), dictionaryVector.getObject(2)); Dictionary dictionary2 = provider.lookup(2L); - Assert.assertNotNull(dictionary2); + assertNotNull(dictionary2); dictionaryVector = ((VarCharVector) dictionary2.getVector()); - Assert.assertEquals(3, dictionaryVector.getValueCount()); - Assert.assertEquals(new Text("micro"), dictionaryVector.getObject(0)); - Assert.assertEquals(new Text("small"), dictionaryVector.getObject(1)); - Assert.assertEquals(new Text("large"), dictionaryVector.getObject(2)); + assertEquals(3, dictionaryVector.getValueCount()); + assertEquals(new Text("micro"), dictionaryVector.getObject(0)); + assertEquals(new Text("small"), dictionaryVector.getObject(1)); + assertEquals(new Text("large"), dictionaryVector.getObject(2)); } protected VectorSchemaRoot writeNestedDictionaryData( @@ -456,26 +461,26 @@ protected VectorSchemaRoot writeNestedDictionaryData( protected void validateNestedDictionary(VectorSchemaRoot root, DictionaryProvider provider) { FieldVector vector = root.getFieldVectors().get(0); - Assert.assertNotNull(vector); - Assert.assertNull(vector.getField().getDictionary()); + assertNotNull(vector); + assertNull(vector.getField().getDictionary()); Field nestedField = vector.getField().getChildren().get(0); DictionaryEncoding encoding = nestedField.getDictionary(); - Assert.assertNotNull(encoding); - Assert.assertEquals(2L, encoding.getId()); - Assert.assertEquals(new ArrowType.Int(32, true), encoding.getIndexType()); + assertNotNull(encoding); + assertEquals(2L, encoding.getId()); + assertEquals(new ArrowType.Int(32, true), encoding.getIndexType()); - Assert.assertEquals(3, vector.getValueCount()); - Assert.assertEquals(Arrays.asList(0, 1), vector.getObject(0)); - Assert.assertEquals(Arrays.asList(0), vector.getObject(1)); - Assert.assertEquals(Arrays.asList(1), vector.getObject(2)); + assertEquals(3, vector.getValueCount()); + assertEquals(Arrays.asList(0, 1), vector.getObject(0)); + assertEquals(Arrays.asList(0), vector.getObject(1)); + assertEquals(Arrays.asList(1), vector.getObject(2)); Dictionary dictionary = provider.lookup(2L); - Assert.assertNotNull(dictionary); + assertNotNull(dictionary); VarCharVector dictionaryVector = ((VarCharVector) dictionary.getVector()); - Assert.assertEquals(2, dictionaryVector.getValueCount()); - Assert.assertEquals(new Text("foo"), dictionaryVector.getObject(0)); - Assert.assertEquals(new Text("bar"), dictionaryVector.getObject(1)); + assertEquals(2, dictionaryVector.getValueCount()); + assertEquals(new Text("foo"), dictionaryVector.getObject(0)); + assertEquals(new Text("bar"), dictionaryVector.getObject(1)); } protected VectorSchemaRoot writeDecimalData(BufferAllocator bufferAllocator) { @@ -509,26 +514,26 @@ protected void validateDecimalData(VectorSchemaRoot root) { DecimalVector decimalVector2 = (DecimalVector) root.getVector("decimal2"); DecimalVector decimalVector3 = (DecimalVector) root.getVector("decimal3"); int count = 10; - Assert.assertEquals(count, root.getRowCount()); + assertEquals(count, root.getRowCount()); for (int i = 0; i < count; i++) { // Verify decimal 1 vector BigDecimal readValue = decimalVector1.getObject(i); ArrowType.Decimal type = (ArrowType.Decimal) decimalVector1.getField().getType(); BigDecimal genValue = new BigDecimal(BigInteger.valueOf(i), type.getScale()); - Assert.assertEquals(genValue, readValue); + assertEquals(genValue, readValue); // Verify decimal 2 vector readValue = decimalVector2.getObject(i); type = (ArrowType.Decimal) decimalVector2.getField().getType(); genValue = new BigDecimal(BigInteger.valueOf(i * (1 << 10)), type.getScale()); - Assert.assertEquals(genValue, readValue); + assertEquals(genValue, readValue); // Verify decimal 3 vector readValue = decimalVector3.getObject(i); type = (ArrowType.Decimal) decimalVector3.getField().getType(); genValue = new BigDecimal(BigInteger.valueOf(i * 1111111111111111L), type.getScale()); - Assert.assertEquals(genValue, readValue); + assertEquals(genValue, readValue); } } @@ -558,18 +563,18 @@ public void validateUnionData(int count, VectorSchemaRoot root) { unionReader.setPosition(i); switch (i % 4) { case 0: - Assert.assertEquals(i, unionReader.readInteger().intValue()); + assertEquals(i, unionReader.readInteger().intValue()); break; case 1: - Assert.assertEquals(i, unionReader.readLong().longValue()); + assertEquals(i, unionReader.readLong().longValue()); break; case 2: - Assert.assertEquals(i % 3, unionReader.size()); + assertEquals(i % 3, unionReader.size()); break; case 3: NullableTimeStampMilliHolder h = new NullableTimeStampMilliHolder(); unionReader.reader("timestamp").read(h); - Assert.assertEquals(i, h.value); + assertEquals(i, h.value); break; default: assert false : "Unexpected value in switch statement: " + i; @@ -623,7 +628,7 @@ public void writeUnionData(int count, StructVector parent) { } protected void writeVarBinaryData(int count, StructVector parent) { - Assert.assertTrue(count < 100); + assertTrue(count < 100); ComplexWriter writer = new ComplexWriterImpl("root", parent); StructWriter rootWriter = writer.rootAsStruct(); ListWriter listWriter = rootWriter.list("list"); @@ -642,7 +647,7 @@ protected void writeVarBinaryData(int count, StructVector parent) { } protected void validateVarBinary(int count, VectorSchemaRoot root) { - Assert.assertEquals(count, root.getRowCount()); + assertEquals(count, root.getRowCount()); ListVector listVector = (ListVector) root.getVector("list"); byte[] expectedArray = new byte[count]; int numVarBinaryValues = 0; @@ -650,23 +655,23 @@ protected void validateVarBinary(int count, VectorSchemaRoot root) { expectedArray[i] = (byte) i; List objList = listVector.getObject(i); if (i % 3 == 0) { - Assert.assertTrue(objList.isEmpty()); + assertTrue(objList.isEmpty()); } else { byte[] expected = Arrays.copyOfRange(expectedArray, 0, i + 1); for (int j = 0; j < i % 3; j++) { byte[] result = (byte[]) objList.get(j); - Assert.assertArrayEquals(result, expected); + assertArrayEquals(result, expected); numVarBinaryValues++; } } } // ListVector lastSet should be the index of last value + 1 - Assert.assertEquals(listVector.getLastSet(), count - 1); + assertEquals(listVector.getLastSet(), count - 1); // VarBinaryVector lastSet should be the index of last value VarBinaryVector binaryVector = (VarBinaryVector) listVector.getChildrenFromFields().get(0); - Assert.assertEquals(binaryVector.getLastSet(), numVarBinaryValues - 1); + assertEquals(binaryVector.getLastSet(), numVarBinaryValues - 1); } protected void writeBatchData(ArrowWriter writer, IntVector vector, VectorSchemaRoot root) throws IOException { @@ -762,7 +767,7 @@ protected void validateMapData(VectorSchemaRoot root) { MapVector sortedMapVector = (MapVector) root.getVector("mapSorted"); final int count = 10; - Assert.assertEquals(count, root.getRowCount()); + assertEquals(count, root.getRowCount()); UnionMapReader mapReader = new UnionMapReader(mapVector); UnionMapReader sortedMapReader = new UnionMapReader(sortedMapVector); @@ -833,7 +838,7 @@ protected void validateListAsMapData(VectorSchemaRoot root) { MapVector sortedMapVector = (MapVector) root.getVector("map"); final int count = 10; - Assert.assertEquals(count, root.getRowCount()); + assertEquals(count, root.getRowCount()); UnionMapReader sortedMapReader = new UnionMapReader(sortedMapVector); sortedMapReader.setKeyValueNames("myKey", "myValue"); diff --git a/java/vector/src/test/java/org/apache/arrow/vector/ipc/ITTestIPCWithLargeArrowBuffers.java b/java/vector/src/test/java/org/apache/arrow/vector/ipc/ITTestIPCWithLargeArrowBuffers.java index d3c91fd144356..52d093ae29ebf 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/ipc/ITTestIPCWithLargeArrowBuffers.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/ipc/ITTestIPCWithLargeArrowBuffers.java @@ -17,10 +17,10 @@ package org.apache.arrow.vector.ipc; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertNotNull; -import static org.junit.Assert.assertTrue; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertTrue; import java.io.File; import java.io.FileInputStream; @@ -40,7 +40,7 @@ import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.FieldType; import org.apache.arrow.vector.types.pojo.Schema; -import org.junit.Test; +import org.junit.jupiter.api.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/java/vector/src/test/java/org/apache/arrow/vector/ipc/MessageSerializerTest.java b/java/vector/src/test/java/org/apache/arrow/vector/ipc/MessageSerializerTest.java index 79a4b249a8a89..d5120b70d01e9 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/ipc/MessageSerializerTest.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/ipc/MessageSerializerTest.java @@ -19,10 +19,10 @@ import static java.util.Arrays.asList; import static org.apache.arrow.memory.util.LargeMemoryUtil.checkedCastToInt; -import static org.junit.Assert.assertArrayEquals; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertTrue; +import static org.junit.jupiter.api.Assertions.assertArrayEquals; +import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; @@ -48,9 +48,7 @@ import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.FieldType; import org.apache.arrow.vector.types.pojo.Schema; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.ExpectedException; +import org.junit.jupiter.api.Test; public class MessageSerializerTest { @@ -154,9 +152,6 @@ public void testSchemaDictionaryMessageSerialization() throws IOException { assertEquals(schema, deserialized); } - @Rule - public ExpectedException expectedEx = ExpectedException.none(); - @Test public void testSerializeRecordBatchV4() throws IOException { byte[] validity = new byte[]{(byte) 255, 0}; @@ -243,5 +238,4 @@ public static void verifyBatch(ArrowRecordBatch batch, byte[] validity, byte[] v assertArrayEquals(validity, MessageSerializerTest.array(buffers.get(0))); assertArrayEquals(values, MessageSerializerTest.array(buffers.get(1))); } - } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestArrowFile.java b/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestArrowFile.java index 4fb5822786083..d76e5263122fe 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestArrowFile.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestArrowFile.java @@ -19,8 +19,8 @@ import static java.nio.channels.Channels.newChannel; import static org.apache.arrow.vector.TestUtils.newVarCharVector; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertTrue; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; @@ -39,7 +39,7 @@ import org.apache.arrow.vector.VectorSchemaRoot; import org.apache.arrow.vector.complex.StructVector; import org.apache.arrow.vector.types.pojo.Field; -import org.junit.Test; +import org.junit.jupiter.api.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestArrowFooter.java b/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestArrowFooter.java index 38c65bddeddea..beb6500ac2ca0 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestArrowFooter.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestArrowFooter.java @@ -18,7 +18,7 @@ package org.apache.arrow.vector.ipc; import static java.util.Arrays.asList; -import static org.junit.Assert.assertEquals; +import static org.junit.jupiter.api.Assertions.assertEquals; import java.nio.ByteBuffer; import java.util.ArrayList; @@ -32,7 +32,7 @@ import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.FieldType; import org.apache.arrow.vector.types.pojo.Schema; -import org.junit.Test; +import org.junit.jupiter.api.Test; import com.google.flatbuffers.FlatBufferBuilder; diff --git a/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestArrowReaderWriter.java b/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestArrowReaderWriter.java index 07875b25029ea..ad9ca50a14979 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestArrowReaderWriter.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestArrowReaderWriter.java @@ -23,11 +23,12 @@ import static org.apache.arrow.vector.TestUtils.newVarCharVector; import static org.apache.arrow.vector.TestUtils.newVector; import static org.apache.arrow.vector.testing.ValueVectorDataPopulator.setVector; -import static org.junit.Assert.assertArrayEquals; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertNotNull; -import static org.junit.Assert.assertTrue; +import static org.junit.jupiter.api.Assertions.assertArrayEquals; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; @@ -87,10 +88,9 @@ import org.apache.arrow.vector.util.ByteArrayReadableSeekableByteChannel; import org.apache.arrow.vector.util.DictionaryUtility; import org.apache.arrow.vector.util.TransferPair; -import org.junit.After; -import org.junit.Before; -import org.junit.Test; -import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; public class TestArrowReaderWriter { @@ -109,7 +109,7 @@ public class TestArrowReaderWriter { private Schema schema; private Schema encodedSchema; - @Before + @BeforeEach public void init() { allocator = new RootAllocator(Long.MAX_VALUE); @@ -150,7 +150,7 @@ public void init() { new DictionaryEncoding(/*id=*/3L, /*ordered=*/false, /*indexType=*/null)); } - @After + @AfterEach public void terminate() throws Exception { dictionaryVector1.close(); dictionaryVector2.close(); @@ -386,18 +386,17 @@ public void testWriteReadWithStructDictionaries() throws IOException { assertEquals(dictionaryVector4.getValueCount(), readDictionaryVector.getValueCount()); final BiFunction typeComparatorIgnoreName = (v1, v2) -> new TypeEqualsVisitor(v1, false, true).equals(v2); - assertTrue("Dictionary vectors are not equal", - new RangeEqualsVisitor(dictionaryVector4, readDictionaryVector, - typeComparatorIgnoreName) - .rangeEquals(new Range(0, 0, dictionaryVector4.getValueCount()))); + assertTrue(new RangeEqualsVisitor(dictionaryVector4, readDictionaryVector, typeComparatorIgnoreName) + .rangeEquals(new Range(0, 0, dictionaryVector4.getValueCount())), + "Dictionary vectors are not equal"); // Assert the decoded vector is correct try (final ValueVector readVector = DictionaryEncoder.decode(readEncoded, readDictionary)) { assertEquals(vector.getValueCount(), readVector.getValueCount()); - assertTrue("Decoded vectors are not equal", - new RangeEqualsVisitor(vector, readVector, typeComparatorIgnoreName) - .rangeEquals(new Range(0, 0, vector.getValueCount()))); + assertTrue(new RangeEqualsVisitor(vector, readVector, typeComparatorIgnoreName) + .rangeEquals(new Range(0, 0, vector.getValueCount())), + "Decoded vectors are not equal"); } } } @@ -986,7 +985,7 @@ public void testFileFooterSizeOverflow() { System.arraycopy(magicBytes, 0, data, footerOffset + 4, ArrowMagic.MAGIC_LENGTH); // test file reader - InvalidArrowFileException e = Assertions.assertThrows(InvalidArrowFileException.class, () -> { + InvalidArrowFileException e = assertThrows(InvalidArrowFileException.class, () -> { try (SeekableReadChannel channel = new SeekableReadChannel(new ByteArrayReadableSeekableByteChannel(data)); ArrowFileReader reader = new ArrowFileReader(channel, allocator)) { reader.getVectorSchemaRoot().getSchema(); diff --git a/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestArrowStream.java b/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestArrowStream.java index 145bdd588e945..7f3541252772f 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestArrowStream.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestArrowStream.java @@ -17,9 +17,9 @@ package org.apache.arrow.vector.ipc; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertTrue; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; @@ -31,8 +31,7 @@ import org.apache.arrow.vector.TinyIntVector; import org.apache.arrow.vector.VectorSchemaRoot; import org.apache.arrow.vector.types.pojo.Schema; -import org.junit.Assert; -import org.junit.Test; +import org.junit.jupiter.api.Test; public class TestArrowStream extends BaseFileTest { @Test @@ -44,15 +43,15 @@ public void testEmptyStream() throws IOException { ByteArrayOutputStream out = new ByteArrayOutputStream(); ArrowStreamWriter writer = new ArrowStreamWriter(root, null, out); writer.close(); - Assert.assertTrue(out.size() > 0); + assertTrue(out.size() > 0); ByteArrayInputStream in = new ByteArrayInputStream(out.toByteArray()); try (ArrowStreamReader reader = new ArrowStreamReader(in, allocator)) { assertEquals(schema, reader.getVectorSchemaRoot().getSchema()); // Empty should return false - Assert.assertFalse(reader.loadNextBatch()); + assertFalse(reader.loadNextBatch()); assertEquals(0, reader.getVectorSchemaRoot().getRowCount()); - Assert.assertFalse(reader.loadNextBatch()); + assertFalse(reader.loadNextBatch()); assertEquals(0, reader.getVectorSchemaRoot().getRowCount()); } } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestArrowStreamPipe.java b/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestArrowStreamPipe.java index 422a63f57f7d8..4ba11fb05ff5d 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestArrowStreamPipe.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestArrowStreamPipe.java @@ -17,8 +17,9 @@ package org.apache.arrow.vector.ipc; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertTrue; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.junit.jupiter.api.Assertions.fail; import java.io.IOException; import java.nio.channels.Pipe; @@ -33,8 +34,7 @@ import org.apache.arrow.vector.ipc.ArrowStreamWriter; import org.apache.arrow.vector.ipc.MessageSerializerTest; import org.apache.arrow.vector.types.pojo.Schema; -import org.junit.Assert; -import org.junit.Test; +import org.junit.jupiter.api.Test; public class TestArrowStreamPipe { Schema schema = MessageSerializerTest.testSchema(); @@ -75,7 +75,7 @@ public void run() { root.close(); } catch (IOException e) { e.printStackTrace(); - Assert.fail(e.toString()); // have to explicitly fail since we're in a separate thread + fail(e.toString()); // have to explicitly fail since we're in a separate thread } } @@ -103,14 +103,14 @@ public boolean loadNextBatch() throws IOException { return false; } VectorSchemaRoot root = getVectorSchemaRoot(); - Assert.assertEquals(16, root.getRowCount()); + assertEquals(16, root.getRowCount()); TinyIntVector vector = (TinyIntVector) root.getFieldVectors().get(0); - Assert.assertEquals((byte) (batchesRead - 1), vector.get(0)); + assertEquals((byte) (batchesRead - 1), vector.get(0)); for (int i = 1; i < 16; i++) { if (i < 8) { - Assert.assertEquals((byte) (i + 1), vector.get(i)); + assertEquals((byte) (i + 1), vector.get(i)); } else { - Assert.assertTrue(vector.isNull(i)); + assertTrue(vector.isNull(i)); } } @@ -129,7 +129,7 @@ public void run() { reader.close(); } catch (IOException e) { e.printStackTrace(); - Assert.fail(e.toString()); // have to explicitly fail since we're in a separate thread + fail(e.toString()); // have to explicitly fail since we're in a separate thread } } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestJSONFile.java b/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestJSONFile.java index bd5bd4feabbd4..a90b97310a1cf 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestJSONFile.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestJSONFile.java @@ -17,9 +17,9 @@ package org.apache.arrow.vector.ipc; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertNotNull; -import static org.junit.Assert.assertNull; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertNull; import java.io.File; import java.io.IOException; @@ -43,8 +43,7 @@ import org.apache.arrow.vector.types.pojo.FieldType; import org.apache.arrow.vector.types.pojo.Schema; import org.apache.arrow.vector.util.Validator; -import org.junit.Assert; -import org.junit.Test; +import org.junit.jupiter.api.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -342,7 +341,7 @@ public void testSetStructLength() throws IOException { // initialize vectors try (VectorSchemaRoot root = reader.read();) { FieldVector vector = root.getVector("struct_nullable"); - Assert.assertEquals(7, vector.getValueCount()); + assertEquals(7, vector.getValueCount()); } } } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestRoundTrip.java b/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestRoundTrip.java index 5f57e90f6ba19..d1a3a6db0da44 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestRoundTrip.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestRoundTrip.java @@ -18,12 +18,13 @@ package org.apache.arrow.vector.ipc; import static org.apache.arrow.vector.dictionary.DictionaryProvider.MapDictionaryProvider; -import static org.junit.Assert.assertArrayEquals; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertNotNull; -import static org.junit.Assert.assertTrue; +import static org.junit.jupiter.api.Assertions.assertArrayEquals; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNotNull; import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.junit.jupiter.api.Assumptions.assumeTrue; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; @@ -33,14 +34,13 @@ import java.io.IOException; import java.nio.channels.Channels; import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collection; import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.concurrent.atomic.AtomicInteger; import java.util.function.BiConsumer; +import java.util.stream.Stream; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; @@ -68,55 +68,47 @@ import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.FieldType; import org.apache.arrow.vector.types.pojo.Schema; -import org.junit.AfterClass; -import org.junit.Assume; -import org.junit.BeforeClass; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.MethodSource; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -@RunWith(Parameterized.class) public class TestRoundTrip extends BaseFileTest { private static final Logger LOGGER = LoggerFactory.getLogger(TestRoundTrip.class); private static BufferAllocator allocator; - private final String name; - private final IpcOption writeOption; - public TestRoundTrip(String name, IpcOption writeOption) { - this.name = name; - this.writeOption = writeOption; - } - - @Parameterized.Parameters(name = "options = {0}") - public static Collection getWriteOption() { + static Stream getWriteOption() { final IpcOption legacy = new IpcOption(true, MetadataVersion.V4); final IpcOption version4 = new IpcOption(false, MetadataVersion.V4); - return Arrays.asList( + return Stream.of( new Object[] {"V4Legacy", legacy}, new Object[] {"V4", version4}, new Object[] {"V5", IpcOption.DEFAULT} ); } - @BeforeClass + @BeforeAll public static void setUpClass() { allocator = new RootAllocator(Integer.MAX_VALUE); } - @AfterClass + @AfterAll public static void tearDownClass() { allocator.close(); } - @Test - public void testStruct() throws Exception { + @ParameterizedTest(name = "options = {0}") + @MethodSource("getWriteOption") + public void testStruct(String name, IpcOption writeOption) throws Exception { try (final BufferAllocator originalVectorAllocator = allocator.newChildAllocator("original vectors", 0, allocator.getLimit()); final StructVector parent = StructVector.empty("parent", originalVectorAllocator)) { writeData(COUNT, parent); roundTrip( + name, + writeOption, new VectorSchemaRoot(parent.getChild("root")), /* dictionaryProvider */null, TestRoundTrip::writeSingleBatch, @@ -125,13 +117,16 @@ public void testStruct() throws Exception { } } - @Test - public void testComplex() throws Exception { + @ParameterizedTest(name = "options = {0}") + @MethodSource("getWriteOption") + public void testComplex(String name, IpcOption writeOption) throws Exception { try (final BufferAllocator originalVectorAllocator = allocator.newChildAllocator("original vectors", 0, allocator.getLimit()); final StructVector parent = StructVector.empty("parent", originalVectorAllocator)) { writeComplexData(COUNT, parent); roundTrip( + name, + writeOption, new VectorSchemaRoot(parent.getChild("root")), /* dictionaryProvider */null, TestRoundTrip::writeSingleBatch, @@ -140,14 +135,17 @@ public void testComplex() throws Exception { } } - @Test - public void testMultipleRecordBatches() throws Exception { + @ParameterizedTest(name = "options = {0}") + @MethodSource("getWriteOption") + public void testMultipleRecordBatches(String name, IpcOption writeOption) throws Exception { int[] counts = {10, 5}; try (final BufferAllocator originalVectorAllocator = allocator.newChildAllocator("original vectors", 0, allocator.getLimit()); final StructVector parent = StructVector.empty("parent", originalVectorAllocator)) { writeData(counts[0], parent); roundTrip( + name, + writeOption, new VectorSchemaRoot(parent.getChild("root")), /* dictionaryProvider */null, (root, writer) -> { @@ -170,9 +168,10 @@ public void testMultipleRecordBatches() throws Exception { } } - @Test - public void testUnionV4() throws Exception { - Assume.assumeTrue(writeOption.metadataVersion == MetadataVersion.V4); + @ParameterizedTest(name = "options = {0}") + @MethodSource("getWriteOption") + public void testUnionV4(String name, IpcOption writeOption) throws Exception { + assumeTrue(writeOption.metadataVersion == MetadataVersion.V4); final File temp = File.createTempFile("arrow-test-" + name + "-", ".arrow"); temp.deleteOnExit(); final ByteArrayOutputStream memoryStream = new ByteArrayOutputStream(); @@ -188,17 +187,18 @@ public void testUnionV4() throws Exception { new ArrowStreamWriter(root, null, Channels.newChannel(memoryStream), writeOption); } }); - assertTrue(e.getMessage(), e.getMessage().contains("Cannot write union with V4 metadata")); + assertTrue(e.getMessage().contains("Cannot write union with V4 metadata"), e.getMessage()); e = assertThrows(IllegalArgumentException.class, () -> { new ArrowStreamWriter(root, null, Channels.newChannel(memoryStream), writeOption); }); - assertTrue(e.getMessage(), e.getMessage().contains("Cannot write union with V4 metadata")); + assertTrue(e.getMessage().contains("Cannot write union with V4 metadata"), e.getMessage()); } } - @Test - public void testUnionV5() throws Exception { - Assume.assumeTrue(writeOption.metadataVersion == MetadataVersion.V5); + @ParameterizedTest(name = "options = {0}") + @MethodSource("getWriteOption") + public void testUnionV5(String name, IpcOption writeOption) throws Exception { + assumeTrue(writeOption.metadataVersion == MetadataVersion.V5); try (final BufferAllocator originalVectorAllocator = allocator.newChildAllocator("original vectors", 0, allocator.getLimit()); final StructVector parent = StructVector.empty("parent", originalVectorAllocator)) { @@ -206,6 +206,8 @@ public void testUnionV5() throws Exception { VectorSchemaRoot root = new VectorSchemaRoot(parent.getChild("root")); validateUnionData(COUNT, root); roundTrip( + name, + writeOption, root, /* dictionaryProvider */null, TestRoundTrip::writeSingleBatch, @@ -214,8 +216,9 @@ public void testUnionV5() throws Exception { } } - @Test - public void testTiny() throws Exception { + @ParameterizedTest(name = "options = {0}") + @MethodSource("getWriteOption") + public void testTiny(String name, IpcOption writeOption) throws Exception { try (final VectorSchemaRoot root = VectorSchemaRoot.create(MessageSerializerTest.testSchema(), allocator)) { root.getFieldVectors().get(0).allocateNew(); int count = 16; @@ -227,6 +230,8 @@ public void testTiny() throws Exception { root.setRowCount(count); roundTrip( + name, + writeOption, root, /* dictionaryProvider */null, TestRoundTrip::writeSingleBatch, @@ -247,8 +252,9 @@ private void validateTinyData(int count, VectorSchemaRoot root) { } } - @Test - public void testMetadata() throws Exception { + @ParameterizedTest(name = "options = {0}") + @MethodSource("getWriteOption") + public void testMetadata(String name, IpcOption writeOption) throws Exception { List childFields = new ArrayList<>(); childFields.add(new Field("varchar-child", new FieldType(true, ArrowType.Utf8.INSTANCE, null, metadata(1)), null)); childFields.add(new Field("float-child", @@ -283,6 +289,8 @@ public void testMetadata() throws Exception { } }; roundTrip( + name, + writeOption, root, /* dictionaryProvider */null, TestRoundTrip::writeSingleBatch, @@ -298,14 +306,17 @@ private Map metadata(int i) { return Collections.unmodifiableMap(map); } - @Test - public void testFlatDictionary() throws Exception { + @ParameterizedTest(name = "options = {0}") + @MethodSource("getWriteOption") + public void testFlatDictionary(String name, IpcOption writeOption) throws Exception { AtomicInteger numDictionaryBlocksWritten = new AtomicInteger(); MapDictionaryProvider provider = new MapDictionaryProvider(); try (final BufferAllocator originalVectorAllocator = allocator.newChildAllocator("original vectors", 0, allocator.getLimit()); final VectorSchemaRoot root = writeFlatDictionaryData(originalVectorAllocator, provider)) { roundTrip( + name, + writeOption, root, provider, (ignored, writer) -> { @@ -339,8 +350,9 @@ public void testFlatDictionary() throws Exception { } } - @Test - public void testNestedDictionary() throws Exception { + @ParameterizedTest(name = "options = {0}") + @MethodSource("getWriteOption") + public void testNestedDictionary(String name, IpcOption writeOption) throws Exception { AtomicInteger numDictionaryBlocksWritten = new AtomicInteger(); MapDictionaryProvider provider = new MapDictionaryProvider(); // data being written: @@ -356,6 +368,8 @@ public void testNestedDictionary() throws Exception { validateNestedDictionary(readRoot, streamReader); }; roundTrip( + name, + writeOption, root, provider, (ignored, writer) -> { @@ -376,8 +390,9 @@ public void testNestedDictionary() throws Exception { } } - @Test - public void testFixedSizeBinary() throws Exception { + @ParameterizedTest(name = "options = {0}") + @MethodSource("getWriteOption") + public void testFixedSizeBinary(String name, IpcOption writeOption) throws Exception { final int count = 10; final int typeWidth = 11; byte[][] byteValues = new byte[count][typeWidth]; @@ -405,6 +420,8 @@ public void testFixedSizeBinary() throws Exception { parent.setValueCount(count); roundTrip( + name, + writeOption, new VectorSchemaRoot(parent), /* dictionaryProvider */null, TestRoundTrip::writeSingleBatch, @@ -413,8 +430,9 @@ public void testFixedSizeBinary() throws Exception { } } - @Test - public void testFixedSizeList() throws Exception { + @ParameterizedTest(name = "options = {0}") + @MethodSource("getWriteOption") + public void testFixedSizeList(String name, IpcOption writeOption) throws Exception { BiConsumer validator = (expectedCount, root) -> { for (int i = 0; i < expectedCount; i++) { assertEquals(Collections2.asImmutableList(i + 0.1f, i + 10.1f), root.getVector("float-pairs") @@ -441,6 +459,8 @@ public void testFixedSizeList() throws Exception { parent.setValueCount(COUNT); roundTrip( + name, + writeOption, new VectorSchemaRoot(parent), /* dictionaryProvider */null, TestRoundTrip::writeSingleBatch, @@ -449,8 +469,9 @@ public void testFixedSizeList() throws Exception { } } - @Test - public void testVarBinary() throws Exception { + @ParameterizedTest(name = "options = {0}") + @MethodSource("getWriteOption") + public void testVarBinary(String name, IpcOption writeOption) throws Exception { try (final BufferAllocator originalVectorAllocator = allocator.newChildAllocator("original vectors", 0, allocator.getLimit()); final StructVector parent = StructVector.empty("parent", originalVectorAllocator)) { @@ -459,6 +480,8 @@ public void testVarBinary() throws Exception { validateVarBinary(COUNT, root); roundTrip( + name, + writeOption, root, /* dictionaryProvider */null, TestRoundTrip::writeSingleBatch, @@ -467,8 +490,9 @@ public void testVarBinary() throws Exception { } } - @Test - public void testReadWriteMultipleBatches() throws IOException { + @ParameterizedTest(name = "options = {0}") + @MethodSource("getWriteOption") + public void testReadWriteMultipleBatches(String name, IpcOption writeOption) throws IOException { File file = new File("target/mytest_nulls_multibatch.arrow"); int numBlocksWritten = 0; @@ -491,12 +515,15 @@ public void testReadWriteMultipleBatches() throws IOException { } } - @Test - public void testMap() throws Exception { + @ParameterizedTest(name = "options = {0}") + @MethodSource("getWriteOption") + public void testMap(String name, IpcOption writeOption) throws Exception { try (final BufferAllocator originalVectorAllocator = allocator.newChildAllocator("original vectors", 0, allocator.getLimit()); final VectorSchemaRoot root = writeMapData(originalVectorAllocator)) { roundTrip( + name, + writeOption, root, /* dictionaryProvider */null, TestRoundTrip::writeSingleBatch, @@ -505,12 +532,15 @@ public void testMap() throws Exception { } } - @Test - public void testListAsMap() throws Exception { + @ParameterizedTest(name = "options = {0}") + @MethodSource("getWriteOption") + public void testListAsMap(String name, IpcOption writeOption) throws Exception { try (final BufferAllocator originalVectorAllocator = allocator.newChildAllocator("original vectors", 0, allocator.getLimit()); final VectorSchemaRoot root = writeListAsMapData(originalVectorAllocator)) { roundTrip( + name, + writeOption, root, /* dictionaryProvider */null, TestRoundTrip::writeSingleBatch, @@ -539,10 +569,10 @@ private CheckedConsumer validateFileBatches( assertEquals(counts.length, recordBatches.size()); long previousOffset = 0; for (ArrowBlock rbBlock : recordBatches) { - assertTrue(rbBlock.getOffset() + " > " + previousOffset, rbBlock.getOffset() > previousOffset); + assertTrue(rbBlock.getOffset() > previousOffset, rbBlock.getOffset() + " > " + previousOffset); previousOffset = rbBlock.getOffset(); arrowReader.loadRecordBatch(rbBlock); - assertEquals("RB #" + i, counts[i], root.getRowCount()); + assertEquals(counts[i], root.getRowCount(), "RB #" + i); validator.accept(counts[i], root); try (final ArrowRecordBatch batch = unloader.getRecordBatch()) { List buffersLayout = batch.getBuffersLayout(); @@ -566,7 +596,7 @@ private CheckedConsumer validateStreamBatches( for (int n = 0; n < counts.length; n++) { assertTrue(arrowReader.loadNextBatch()); - assertEquals("RB #" + i, counts[i], root.getRowCount()); + assertEquals(counts[i], root.getRowCount(), "RB #" + i); validator.accept(counts[i], root); try (final ArrowRecordBatch batch = unloader.getRecordBatch()) { final List buffersLayout = batch.getBuffersLayout(); @@ -590,7 +620,7 @@ interface CheckedBiConsumer { void accept(T t, U u) throws Exception; } - private void roundTrip(VectorSchemaRoot root, DictionaryProvider provider, + private void roundTrip(String name, IpcOption writeOption, VectorSchemaRoot root, DictionaryProvider provider, CheckedBiConsumer writer, CheckedConsumer fileValidator, CheckedConsumer streamValidator) throws Exception { diff --git a/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestUIntDictionaryRoundTrip.java b/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestUIntDictionaryRoundTrip.java index ac95121eb73f2..db1e787d04d27 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestUIntDictionaryRoundTrip.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestUIntDictionaryRoundTrip.java @@ -18,10 +18,10 @@ package org.apache.arrow.vector.ipc; import static org.apache.arrow.vector.testing.ValueVectorDataPopulator.setVector; -import static org.junit.Assert.assertArrayEquals; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertNotNull; -import static org.junit.Assert.assertTrue; +import static org.junit.jupiter.api.Assertions.assertArrayEquals; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertTrue; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; @@ -29,9 +29,9 @@ import java.nio.channels.Channels; import java.nio.charset.StandardCharsets; import java.util.Arrays; -import java.util.Collection; import java.util.Map; import java.util.function.ToIntBiFunction; +import java.util.stream.Stream; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; @@ -51,41 +51,34 @@ import org.apache.arrow.vector.types.pojo.FieldType; import org.apache.arrow.vector.types.pojo.Schema; import org.apache.arrow.vector.util.ByteArrayReadableSeekableByteChannel; -import org.junit.After; -import org.junit.Before; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; /** * Test the round-trip of dictionary encoding, * with unsigned integer as indices. */ -@RunWith(Parameterized.class) public class TestUIntDictionaryRoundTrip { - private final boolean streamMode; - - public TestUIntDictionaryRoundTrip(boolean streamMode) { - this.streamMode = streamMode; - } - private BufferAllocator allocator; private DictionaryProvider.MapDictionaryProvider dictionaryProvider; - @Before + @BeforeEach public void init() { allocator = new RootAllocator(Long.MAX_VALUE); dictionaryProvider = new DictionaryProvider.MapDictionaryProvider(); } - @After + @AfterEach public void terminate() throws Exception { allocator.close(); } - private byte[] writeData(FieldVector encodedVector) throws IOException { + private byte[] writeData(boolean streamMode, FieldVector encodedVector) throws IOException { ByteArrayOutputStream out = new ByteArrayOutputStream(); VectorSchemaRoot root = new VectorSchemaRoot( @@ -102,6 +95,7 @@ private byte[] writeData(FieldVector encodedVector) throws IOException { } private void readData( + boolean streamMode, byte[] data, Field expectedField, ToIntBiFunction valGetter, @@ -156,8 +150,9 @@ private ValueVector createEncodedVector(int bitWidth, VarCharVector dictionaryVe return field.createVector(allocator); } - @Test - public void testUInt1RoundTrip() throws IOException { + @ParameterizedTest(name = "stream mode = {0}") + @MethodSource("getRepeat") + public void testUInt1RoundTrip(boolean streamMode) throws IOException { final int vectorLength = UInt1Vector.MAX_UINT1 & UInt1Vector.PROMOTION_MASK; try (VarCharVector dictionaryVector = new VarCharVector("dictionary", allocator); UInt1Vector encodedVector1 = (UInt1Vector) createEncodedVector(8, dictionaryVector)) { @@ -170,15 +165,15 @@ public void testUInt1RoundTrip() throws IOException { } encodedVector1.setValueCount(vectorLength); setVector(dictionaryVector, dictionaryItems); - byte[] data = writeData(encodedVector1); - readData( - data, encodedVector1.getField(), (vector, index) -> (int) ((UInt1Vector) vector).getValueAsLong(index), - 8L, indices, dictionaryItems); + byte[] data = writeData(streamMode, encodedVector1); + readData(streamMode, data, encodedVector1.getField(), + (vector, index) -> (int) ((UInt1Vector) vector).getValueAsLong(index), 8L, indices, dictionaryItems); } } - @Test - public void testUInt2RoundTrip() throws IOException { + @ParameterizedTest(name = "stream mode = {0}") + @MethodSource("getRepeat") + public void testUInt2RoundTrip(boolean streamMode) throws IOException { try (VarCharVector dictionaryVector = new VarCharVector("dictionary", allocator); UInt2Vector encodedVector2 = (UInt2Vector) createEncodedVector(16, dictionaryVector)) { int[] indices = new int[]{1, 3, 5, 7, 9, UInt2Vector.MAX_UINT2}; @@ -190,14 +185,15 @@ public void testUInt2RoundTrip() throws IOException { setVector(encodedVector2, (char) 1, (char) 3, (char) 5, (char) 7, (char) 9, UInt2Vector.MAX_UINT2); setVector(dictionaryVector, dictItems); - byte[] data = writeData(encodedVector2); - readData(data, encodedVector2.getField(), (vector, index) -> (int) ((UInt2Vector) vector).getValueAsLong(index), - 16L, indices, dictItems); + byte[] data = writeData(streamMode, encodedVector2); + readData(streamMode, data, encodedVector2.getField(), + (vector, index) -> (int) ((UInt2Vector) vector).getValueAsLong(index), 16L, indices, dictItems); } } - @Test - public void testUInt4RoundTrip() throws IOException { + @ParameterizedTest(name = "stream mode = {0}") + @MethodSource("getRepeat") + public void testUInt4RoundTrip(boolean streamMode) throws IOException { final int dictLength = 10; try (VarCharVector dictionaryVector = new VarCharVector("dictionary", allocator); UInt4Vector encodedVector4 = (UInt4Vector) createEncodedVector(32, dictionaryVector)) { @@ -211,14 +207,15 @@ public void testUInt4RoundTrip() throws IOException { setVector(dictionaryVector, dictItems); setVector(encodedVector4, 1, 3, 5, 7, 9); - byte[] data = writeData(encodedVector4); - readData(data, encodedVector4.getField(), (vector, index) -> (int) ((UInt4Vector) vector).getValueAsLong(index), - 32L, indices, dictItems); + byte[] data = writeData(streamMode, encodedVector4); + readData(streamMode, data, encodedVector4.getField(), + (vector, index) -> (int) ((UInt4Vector) vector).getValueAsLong(index), 32L, indices, dictItems); } } - @Test - public void testUInt8RoundTrip() throws IOException { + @ParameterizedTest(name = "stream mode = {0}") + @MethodSource("getRepeat") + public void testUInt8RoundTrip(boolean streamMode) throws IOException { final int dictLength = 10; try (VarCharVector dictionaryVector = new VarCharVector("dictionary", allocator); UInt8Vector encodedVector8 = (UInt8Vector) createEncodedVector(64, dictionaryVector)) { @@ -231,17 +228,16 @@ public void testUInt8RoundTrip() throws IOException { setVector(encodedVector8, 1L, 3L, 5L, 7L, 9L); setVector(dictionaryVector, dictItems); - byte[] data = writeData(encodedVector8); - readData(data, encodedVector8.getField(), (vector, index) -> (int) ((UInt8Vector) vector).getValueAsLong(index), - 64L, indices, dictItems); + byte[] data = writeData(streamMode, encodedVector8); + readData(streamMode, data, encodedVector8.getField(), + (vector, index) -> (int) ((UInt8Vector) vector).getValueAsLong(index), 64L, indices, dictItems); } } - @Parameterized.Parameters(name = "stream mode = {0}") - public static Collection getRepeat() { - return Arrays.asList( - new Object[]{true}, - new Object[]{false} + static Stream getRepeat() { + return Stream.of( + Arguments.of(true), + Arguments.of(false) ); } } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/ipc/message/TestMessageMetadataResult.java b/java/vector/src/test/java/org/apache/arrow/vector/ipc/message/TestMessageMetadataResult.java index 0505a18484b54..89cbb9f3f1b89 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/ipc/message/TestMessageMetadataResult.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/ipc/message/TestMessageMetadataResult.java @@ -17,11 +17,11 @@ package org.apache.arrow.vector.ipc.message; -import static org.junit.Assert.assertEquals; +import static org.junit.jupiter.api.Assertions.assertEquals; import java.nio.ByteBuffer; -import org.junit.Test; +import org.junit.jupiter.api.Test; public class TestMessageMetadataResult { diff --git a/java/vector/src/test/java/org/apache/arrow/vector/pojo/TestConvert.java b/java/vector/src/test/java/org/apache/arrow/vector/pojo/TestConvert.java index 5cc0d080053af..925f6ca254544 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/pojo/TestConvert.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/pojo/TestConvert.java @@ -19,8 +19,8 @@ import static org.apache.arrow.vector.types.FloatingPointPrecision.DOUBLE; import static org.apache.arrow.vector.types.FloatingPointPrecision.SINGLE; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; import java.nio.ByteBuffer; import java.util.ArrayList; @@ -46,7 +46,7 @@ import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.FieldType; import org.apache.arrow.vector.types.pojo.Schema; -import org.junit.Test; +import org.junit.jupiter.api.Test; import com.google.flatbuffers.FlatBufferBuilder; diff --git a/java/vector/src/test/java/org/apache/arrow/vector/testing/TestValueVectorPopulator.java b/java/vector/src/test/java/org/apache/arrow/vector/testing/TestValueVectorPopulator.java index 3c075c9293079..369fcc140a1b1 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/testing/TestValueVectorPopulator.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/testing/TestValueVectorPopulator.java @@ -17,8 +17,8 @@ package org.apache.arrow.vector.testing; -import static junit.framework.TestCase.assertTrue; import static org.apache.arrow.vector.testing.ValueVectorDataPopulator.setVector; +import static org.junit.jupiter.api.Assertions.assertTrue; import java.nio.charset.StandardCharsets; @@ -56,20 +56,20 @@ import org.apache.arrow.vector.types.TimeUnit; import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.types.pojo.FieldType; -import org.junit.After; -import org.junit.Before; -import org.junit.Test; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; public class TestValueVectorPopulator { private BufferAllocator allocator; - @Before + @BeforeEach public void init() { allocator = new RootAllocator(Long.MAX_VALUE); } - @After + @AfterEach public void terminate() throws Exception { allocator.close(); } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/testing/ValueVectorDataPopulator.java b/java/vector/src/test/java/org/apache/arrow/vector/testing/ValueVectorDataPopulator.java index 9bfcb3c635d86..66dc13d6ef545 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/testing/ValueVectorDataPopulator.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/testing/ValueVectorDataPopulator.java @@ -17,7 +17,7 @@ package org.apache.arrow.vector.testing; -import static org.junit.Assert.assertEquals; +import static org.junit.jupiter.api.Assertions.assertEquals; import java.math.BigDecimal; import java.nio.charset.StandardCharsets; @@ -61,6 +61,7 @@ import org.apache.arrow.vector.UInt8Vector; import org.apache.arrow.vector.VarBinaryVector; import org.apache.arrow.vector.VarCharVector; +import org.apache.arrow.vector.VariableWidthFieldVector; import org.apache.arrow.vector.complex.BaseRepeatedValueVector; import org.apache.arrow.vector.complex.FixedSizeListVector; import org.apache.arrow.vector.complex.LargeListVector; @@ -586,6 +587,17 @@ public static void setVector(VarCharVector vector, byte[]... values) { vector.setValueCount(length); } + public static void setVector(VariableWidthFieldVector vector, byte[]... values) { + final int length = values.length; + vector.allocateNewSafe(); + for (int i = 0; i < length; i++) { + if (values[i] != null) { + vector.set(i, values[i]); + } + } + vector.setValueCount(length); + } + /** * Populate values for LargeVarCharVector. */ diff --git a/java/vector/src/test/java/org/apache/arrow/vector/types/pojo/TestExtensionType.java b/java/vector/src/test/java/org/apache/arrow/vector/types/pojo/TestExtensionType.java index 872b2f3934b07..5ebfb62038919 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/types/pojo/TestExtensionType.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/types/pojo/TestExtensionType.java @@ -17,10 +17,12 @@ package org.apache.arrow.vector.types.pojo; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertTrue; +import static org.junit.jupiter.api.Assertions.assertArrayEquals; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNotNull; import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; import java.io.File; import java.io.IOException; @@ -51,8 +53,7 @@ import org.apache.arrow.vector.types.pojo.ArrowType.ExtensionType; import org.apache.arrow.vector.util.VectorBatchAppender; import org.apache.arrow.vector.validate.ValidateVectorVisitor; -import org.junit.Assert; -import org.junit.Test; +import org.junit.jupiter.api.Test; public class TestExtensionType { /** @@ -85,21 +86,19 @@ public void roundtripUuid() throws IOException { final ArrowFileReader reader = new ArrowFileReader(channel, allocator)) { reader.loadNextBatch(); final VectorSchemaRoot readerRoot = reader.getVectorSchemaRoot(); - Assert.assertEquals(root.getSchema(), readerRoot.getSchema()); + assertEquals(root.getSchema(), readerRoot.getSchema()); final Field field = readerRoot.getSchema().getFields().get(0); final UuidType expectedType = new UuidType(); - Assert.assertEquals(field.getMetadata().get(ExtensionType.EXTENSION_METADATA_KEY_NAME), - expectedType.extensionName()); - Assert.assertEquals(field.getMetadata().get(ExtensionType.EXTENSION_METADATA_KEY_METADATA), - expectedType.serialize()); + assertEquals(field.getMetadata().get(ExtensionType.EXTENSION_METADATA_KEY_NAME), expectedType.extensionName()); + assertEquals(field.getMetadata().get(ExtensionType.EXTENSION_METADATA_KEY_METADATA), expectedType.serialize()); final ExtensionTypeVector deserialized = (ExtensionTypeVector) readerRoot.getFieldVectors().get(0); - Assert.assertEquals(vector.getValueCount(), deserialized.getValueCount()); + assertEquals(vector.getValueCount(), deserialized.getValueCount()); for (int i = 0; i < vector.getValueCount(); i++) { - Assert.assertEquals(vector.isNull(i), deserialized.isNull(i)); + assertEquals(vector.isNull(i), deserialized.isNull(i)); if (!vector.isNull(i)) { - Assert.assertEquals(vector.getObject(i), deserialized.getObject(i)); + assertEquals(vector.getObject(i), deserialized.getObject(i)); } } } @@ -138,29 +137,27 @@ public void readUnderlyingType() throws IOException { final ArrowFileReader reader = new ArrowFileReader(channel, allocator)) { reader.loadNextBatch(); final VectorSchemaRoot readerRoot = reader.getVectorSchemaRoot(); - Assert.assertEquals(1, readerRoot.getSchema().getFields().size()); - Assert.assertEquals("a", readerRoot.getSchema().getFields().get(0).getName()); - Assert.assertTrue(readerRoot.getSchema().getFields().get(0).getType() instanceof ArrowType.FixedSizeBinary); - Assert.assertEquals(16, + assertEquals(1, readerRoot.getSchema().getFields().size()); + assertEquals("a", readerRoot.getSchema().getFields().get(0).getName()); + assertTrue(readerRoot.getSchema().getFields().get(0).getType() instanceof ArrowType.FixedSizeBinary); + assertEquals(16, ((ArrowType.FixedSizeBinary) readerRoot.getSchema().getFields().get(0).getType()).getByteWidth()); final Field field = readerRoot.getSchema().getFields().get(0); final UuidType expectedType = new UuidType(); - Assert.assertEquals(field.getMetadata().get(ExtensionType.EXTENSION_METADATA_KEY_NAME), - expectedType.extensionName()); - Assert.assertEquals(field.getMetadata().get(ExtensionType.EXTENSION_METADATA_KEY_METADATA), - expectedType.serialize()); + assertEquals(field.getMetadata().get(ExtensionType.EXTENSION_METADATA_KEY_NAME), expectedType.extensionName()); + assertEquals(field.getMetadata().get(ExtensionType.EXTENSION_METADATA_KEY_METADATA), expectedType.serialize()); final FixedSizeBinaryVector deserialized = (FixedSizeBinaryVector) readerRoot.getFieldVectors().get(0); - Assert.assertEquals(vector.getValueCount(), deserialized.getValueCount()); + assertEquals(vector.getValueCount(), deserialized.getValueCount()); for (int i = 0; i < vector.getValueCount(); i++) { - Assert.assertEquals(vector.isNull(i), deserialized.isNull(i)); + assertEquals(vector.isNull(i), deserialized.isNull(i)); if (!vector.isNull(i)) { final UUID uuid = vector.getObject(i); final ByteBuffer bb = ByteBuffer.allocate(16); bb.putLong(uuid.getMostSignificantBits()); bb.putLong(uuid.getLeastSignificantBits()); - Assert.assertArrayEquals(bb.array(), deserialized.get(i)); + assertArrayEquals(bb.array(), deserialized.get(i)); } } } @@ -210,26 +207,24 @@ public void roundtripLocation() throws IOException { final ArrowFileReader reader = new ArrowFileReader(channel, allocator)) { reader.loadNextBatch(); final VectorSchemaRoot readerRoot = reader.getVectorSchemaRoot(); - Assert.assertEquals(root.getSchema(), readerRoot.getSchema()); + assertEquals(root.getSchema(), readerRoot.getSchema()); final Field field = readerRoot.getSchema().getFields().get(0); final LocationType expectedType = new LocationType(); - Assert.assertEquals(field.getMetadata().get(ExtensionType.EXTENSION_METADATA_KEY_NAME), - expectedType.extensionName()); - Assert.assertEquals(field.getMetadata().get(ExtensionType.EXTENSION_METADATA_KEY_METADATA), - expectedType.serialize()); + assertEquals(field.getMetadata().get(ExtensionType.EXTENSION_METADATA_KEY_NAME), expectedType.extensionName()); + assertEquals(field.getMetadata().get(ExtensionType.EXTENSION_METADATA_KEY_METADATA), expectedType.serialize()); final ExtensionTypeVector deserialized = (ExtensionTypeVector) readerRoot.getFieldVectors().get(0); - Assert.assertTrue(deserialized instanceof LocationVector); - Assert.assertEquals("location", deserialized.getName()); + assertTrue(deserialized instanceof LocationVector); + assertEquals("location", deserialized.getName()); StructVector deserStruct = (StructVector) deserialized.getUnderlyingVector(); - Assert.assertNotNull(deserStruct.getChild("Latitude")); - Assert.assertNotNull(deserStruct.getChild("Longitude")); - Assert.assertEquals(vector.getValueCount(), deserialized.getValueCount()); + assertNotNull(deserStruct.getChild("Latitude")); + assertNotNull(deserStruct.getChild("Longitude")); + assertEquals(vector.getValueCount(), deserialized.getValueCount()); for (int i = 0; i < vector.getValueCount(); i++) { - Assert.assertEquals(vector.isNull(i), deserialized.isNull(i)); + assertEquals(vector.isNull(i), deserialized.isNull(i)); if (!vector.isNull(i)) { - Assert.assertEquals(vector.getObject(i), deserialized.getObject(i)); + assertEquals(vector.getObject(i), deserialized.getObject(i)); } } } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/types/pojo/TestField.java b/java/vector/src/test/java/org/apache/arrow/vector/types/pojo/TestField.java index bc984fa642d52..8f98a9e9f8b53 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/types/pojo/TestField.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/types/pojo/TestField.java @@ -19,8 +19,8 @@ import static org.apache.arrow.vector.types.pojo.Schema.METADATA_KEY; import static org.apache.arrow.vector.types.pojo.Schema.METADATA_VALUE; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertTrue; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; import java.io.IOException; import java.util.Collections; @@ -28,7 +28,7 @@ import java.util.Map; import org.apache.arrow.vector.types.pojo.ArrowType.Int; -import org.junit.Test; +import org.junit.jupiter.api.Test; public class TestField { @@ -57,7 +57,7 @@ public void testMetadata() throws IOException { private void jsonContains(String json, String... strings) { for (String string : strings) { - assertTrue(json + " contains " + string, json.contains(string)); + assertTrue(json.contains(string), json + " contains " + string); } } } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/types/pojo/TestSchema.java b/java/vector/src/test/java/org/apache/arrow/vector/types/pojo/TestSchema.java index 7b62247c6e12d..e51e76737dfb7 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/types/pojo/TestSchema.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/types/pojo/TestSchema.java @@ -20,8 +20,8 @@ import static java.util.Arrays.asList; import static org.apache.arrow.vector.types.pojo.Schema.METADATA_KEY; import static org.apache.arrow.vector.types.pojo.Schema.METADATA_VALUE; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertTrue; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; import java.io.IOException; import java.nio.ByteBuffer; @@ -49,7 +49,7 @@ import org.apache.arrow.vector.types.pojo.ArrowType.Timestamp; import org.apache.arrow.vector.types.pojo.ArrowType.Union; import org.apache.arrow.vector.types.pojo.ArrowType.Utf8; -import org.junit.Test; +import org.junit.jupiter.api.Test; public class TestSchema { @@ -280,13 +280,13 @@ private void validateFieldsHashcode(java.util.List schemaFields, java.uti private void validateHashCode(Object o1, Object o2) { assertEquals(o1, o2); - assertEquals(o1 + " == " + o2, o1.hashCode(), o2.hashCode()); + assertEquals(o1.hashCode(), o2.hashCode(), o1 + " == " + o2); } private void contains(Schema schema, String... s) { String json = schema.toJson(); for (String string : s) { - assertTrue(json + " contains " + string, json.contains(string)); + assertTrue(json.contains(string), json + " contains " + string); } } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/util/DecimalUtilityTest.java b/java/vector/src/test/java/org/apache/arrow/vector/util/DecimalUtilityTest.java index 804092ed94ac7..21906cb89af24 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/util/DecimalUtilityTest.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/util/DecimalUtilityTest.java @@ -17,14 +17,15 @@ package org.apache.arrow.vector.util; +import static org.junit.jupiter.api.Assertions.assertEquals; + import java.math.BigDecimal; import java.math.BigInteger; import org.apache.arrow.memory.ArrowBuf; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; -import org.junit.Assert; -import org.junit.Test; +import org.junit.jupiter.api.Test; public class DecimalUtilityTest { private static final BigInteger[] MAX_BIG_INT = new BigInteger[]{BigInteger.valueOf(10).pow(38) @@ -45,7 +46,7 @@ public void testSetLongInDecimalArrowBuf() { DecimalUtility.writeLongToArrowBuf((long) val, buf, 0, byteLengths[x]); BigDecimal actual = DecimalUtility.getBigDecimalFromArrowBuf(buf, 0, 0, byteLengths[x]); BigDecimal expected = BigDecimal.valueOf(val); - Assert.assertEquals(expected, actual); + assertEquals(expected, actual); } } } @@ -64,7 +65,7 @@ public void testSetByteArrayInDecimalArrowBuf() { DecimalUtility.writeByteArrayToArrowBuf(BigInteger.valueOf(val).toByteArray(), buf, 0, byteLengths[x]); BigDecimal actual = DecimalUtility.getBigDecimalFromArrowBuf(buf, 0, 0, byteLengths[x]); BigDecimal expected = BigDecimal.valueOf(val); - Assert.assertEquals(expected, actual); + assertEquals(expected, actual); } long [] longValues = new long[] {Long.MIN_VALUE, 0 , Long.MAX_VALUE}; @@ -73,7 +74,7 @@ public void testSetByteArrayInDecimalArrowBuf() { DecimalUtility.writeByteArrayToArrowBuf(BigInteger.valueOf(val).toByteArray(), buf, 0, byteLengths[x]); BigDecimal actual = DecimalUtility.getBigDecimalFromArrowBuf(buf, 0, 0, byteLengths[x]); BigDecimal expected = BigDecimal.valueOf(val); - Assert.assertEquals(expected, actual); + assertEquals(expected, actual); } BigInteger [] decimals = new BigInteger[] {MAX_BIG_INT[x], new BigInteger("0"), MIN_BIG_INT[x]}; @@ -82,7 +83,7 @@ public void testSetByteArrayInDecimalArrowBuf() { DecimalUtility.writeByteArrayToArrowBuf(val.toByteArray(), buf, 0, byteLengths[x]); BigDecimal actual = DecimalUtility.getBigDecimalFromArrowBuf(buf, 0, 0, byteLengths[x]); BigDecimal expected = new BigDecimal(val); - Assert.assertEquals(expected, actual); + assertEquals(expected, actual); } } } @@ -101,7 +102,7 @@ public void testSetBigDecimalInDecimalArrowBuf() { DecimalUtility.writeBigDecimalToArrowBuf(BigDecimal.valueOf(val), buf, 0, byteLengths[x]); BigDecimal actual = DecimalUtility.getBigDecimalFromArrowBuf(buf, 0, 0, byteLengths[x]); BigDecimal expected = BigDecimal.valueOf(val); - Assert.assertEquals(expected, actual); + assertEquals(expected, actual); } long [] longValues = new long[] {Long.MIN_VALUE, 0 , Long.MAX_VALUE}; @@ -110,7 +111,7 @@ public void testSetBigDecimalInDecimalArrowBuf() { DecimalUtility.writeBigDecimalToArrowBuf(BigDecimal.valueOf(val), buf, 0, byteLengths[x]); BigDecimal actual = DecimalUtility.getBigDecimalFromArrowBuf(buf, 0, 0, byteLengths[x]); BigDecimal expected = BigDecimal.valueOf(val); - Assert.assertEquals(expected, actual); + assertEquals(expected, actual); } BigInteger [] decimals = new BigInteger[] {MAX_BIG_INT[x], new BigInteger("0"), MIN_BIG_INT[x]}; @@ -119,7 +120,7 @@ public void testSetBigDecimalInDecimalArrowBuf() { DecimalUtility.writeBigDecimalToArrowBuf(new BigDecimal(val), buf, 0, byteLengths[x]); BigDecimal actual = DecimalUtility.getBigDecimalFromArrowBuf(buf, 0, 0, byteLengths[x]); BigDecimal expected = new BigDecimal(val); - Assert.assertEquals(expected, actual); + assertEquals(expected, actual); } } } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/util/TestDataSizeRoundingUtil.java b/java/vector/src/test/java/org/apache/arrow/vector/util/TestDataSizeRoundingUtil.java index 4138ea9d7a181..636de9aab1f2b 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/util/TestDataSizeRoundingUtil.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/util/TestDataSizeRoundingUtil.java @@ -17,9 +17,9 @@ package org.apache.arrow.vector.util; -import static org.junit.Assert.assertEquals; +import static org.junit.jupiter.api.Assertions.assertEquals; -import org.junit.Test; +import org.junit.jupiter.api.Test; /** * Test cases for {@link DataSizeRoundingUtil}. diff --git a/java/vector/src/test/java/org/apache/arrow/vector/util/TestElementAddressableVectorIterator.java b/java/vector/src/test/java/org/apache/arrow/vector/util/TestElementAddressableVectorIterator.java index 1c8281c85981b..fb954413e9f29 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/util/TestElementAddressableVectorIterator.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/util/TestElementAddressableVectorIterator.java @@ -17,8 +17,8 @@ package org.apache.arrow.vector.util; -import static junit.framework.TestCase.assertNull; -import static org.junit.Assert.assertEquals; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNull; import java.nio.charset.StandardCharsets; @@ -27,9 +27,9 @@ import org.apache.arrow.memory.util.ArrowBufPointer; import org.apache.arrow.vector.IntVector; import org.apache.arrow.vector.VarCharVector; -import org.junit.After; -import org.junit.Before; -import org.junit.Test; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; /** * Test cases for {@link ElementAddressableVectorIterator}. @@ -40,12 +40,12 @@ public class TestElementAddressableVectorIterator { private BufferAllocator allocator; - @Before + @BeforeEach public void prepare() { allocator = new RootAllocator(1024 * 1024); } - @After + @AfterEach public void shutdown() { allocator.close(); } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/util/TestMapWithOrdinal.java b/java/vector/src/test/java/org/apache/arrow/vector/util/TestMapWithOrdinal.java index edd5221faf268..e0c9031c49b94 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/util/TestMapWithOrdinal.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/util/TestMapWithOrdinal.java @@ -18,20 +18,20 @@ package org.apache.arrow.vector.util; import static junit.framework.TestCase.assertNull; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertTrue; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; import java.util.Collection; -import org.junit.Before; -import org.junit.Test; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; public class TestMapWithOrdinal { private MapWithOrdinal map; - @Before + @BeforeEach public void setUp() { map = new MapWithOrdinalImpl<>(); } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/util/TestMultiMapWithOrdinal.java b/java/vector/src/test/java/org/apache/arrow/vector/util/TestMultiMapWithOrdinal.java index ea829060d1c04..0c03f3dfeac46 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/util/TestMultiMapWithOrdinal.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/util/TestMultiMapWithOrdinal.java @@ -17,8 +17,11 @@ package org.apache.arrow.vector.util; -import org.junit.Assert; -import org.junit.Test; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import org.junit.jupiter.api.Test; public class TestMultiMapWithOrdinal { @@ -27,33 +30,33 @@ public void test() { MultiMapWithOrdinal map = new MultiMapWithOrdinal<>(); map.put("x", "1", false); - Assert.assertEquals(1, map.size()); + assertEquals(1, map.size()); map.remove("x", "1"); - Assert.assertTrue(map.isEmpty()); + assertTrue(map.isEmpty()); map.put("x", "1", false); map.put("x", "2", false); map.put("y", "0", false); - Assert.assertEquals(3, map.size()); - Assert.assertEquals(2, map.getAll("x").size()); - Assert.assertEquals("1", map.getAll("x").stream().findFirst().get()); - Assert.assertEquals("1", map.getByOrdinal(0)); - Assert.assertEquals("2", map.getByOrdinal(1)); - Assert.assertEquals("0", map.getByOrdinal(2)); - Assert.assertTrue(map.remove("x", "1")); - Assert.assertFalse(map.remove("x", "1")); - Assert.assertEquals("0", map.getByOrdinal(0)); - Assert.assertEquals(2, map.size()); + assertEquals(3, map.size()); + assertEquals(2, map.getAll("x").size()); + assertEquals("1", map.getAll("x").stream().findFirst().get()); + assertEquals("1", map.getByOrdinal(0)); + assertEquals("2", map.getByOrdinal(1)); + assertEquals("0", map.getByOrdinal(2)); + assertTrue(map.remove("x", "1")); + assertFalse(map.remove("x", "1")); + assertEquals("0", map.getByOrdinal(0)); + assertEquals(2, map.size()); map.put("x", "3", true); - Assert.assertEquals(1, map.getAll("x").size()); - Assert.assertEquals("3", map.getAll("x").stream().findFirst().get()); + assertEquals(1, map.getAll("x").size()); + assertEquals("3", map.getAll("x").stream().findFirst().get()); map.put("z", "4", false); - Assert.assertEquals(3, map.size()); + assertEquals(3, map.size()); map.put("z", "5", false); map.put("z", "6", false); - Assert.assertEquals(5, map.size()); + assertEquals(5, map.size()); map.removeAll("z"); - Assert.assertEquals(2, map.size()); - Assert.assertFalse(map.containsKey("z")); + assertEquals(2, map.size()); + assertFalse(map.containsKey("z")); } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/util/TestReusableByteArray.java b/java/vector/src/test/java/org/apache/arrow/vector/util/TestReusableByteArray.java index f562e63b4bf8d..80420608c3912 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/util/TestReusableByteArray.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/util/TestReusableByteArray.java @@ -17,11 +17,11 @@ package org.apache.arrow.vector.util; -import static org.junit.Assert.assertArrayEquals; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertNotSame; -import static org.junit.Assert.assertSame; -import static org.junit.Assert.assertTrue; +import static org.junit.jupiter.api.Assertions.assertArrayEquals; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotSame; +import static org.junit.jupiter.api.Assertions.assertSame; +import static org.junit.jupiter.api.Assertions.assertTrue; import java.nio.charset.StandardCharsets; import java.util.Arrays; @@ -31,21 +31,21 @@ import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; import org.apache.arrow.vector.BaseValueVector; -import org.junit.After; -import org.junit.Before; -import org.junit.Test; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; public class TestReusableByteArray { private BufferAllocator allocator; - @Before + @BeforeEach public void prepare() { // Permit allocating 4 vectors of max size. allocator = new RootAllocator(4 * BaseValueVector.MAX_ALLOCATION_SIZE); } - @After + @AfterEach public void shutdown() { allocator.close(); } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/util/TestSchemaUtil.java b/java/vector/src/test/java/org/apache/arrow/vector/util/TestSchemaUtil.java index 52b6584086832..4375ca6e690b7 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/util/TestSchemaUtil.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/util/TestSchemaUtil.java @@ -18,7 +18,7 @@ package org.apache.arrow.vector.util; import static java.util.Arrays.asList; -import static org.junit.Assert.assertEquals; +import static org.junit.jupiter.api.Assertions.assertEquals; import java.io.IOException; @@ -28,7 +28,7 @@ import org.apache.arrow.vector.types.pojo.FieldType; import org.apache.arrow.vector.types.pojo.Schema; import org.apache.arrow.vector.util.SchemaUtility; -import org.junit.Test; +import org.junit.jupiter.api.Test; public class TestSchemaUtil { diff --git a/java/vector/src/test/java/org/apache/arrow/vector/util/TestValidator.java b/java/vector/src/test/java/org/apache/arrow/vector/util/TestValidator.java index 2db70ca5d5b8d..0f72ada76f933 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/util/TestValidator.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/util/TestValidator.java @@ -18,10 +18,10 @@ package org.apache.arrow.vector.util; import static org.apache.arrow.vector.util.Validator.equalEnough; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertTrue; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; -import org.junit.Test; +import org.junit.jupiter.api.Test; public class TestValidator { diff --git a/java/vector/src/test/java/org/apache/arrow/vector/util/TestVectorAppender.java b/java/vector/src/test/java/org/apache/arrow/vector/util/TestVectorAppender.java index 93e7535947536..45563a69ba9e6 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/util/TestVectorAppender.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/util/TestVectorAppender.java @@ -17,9 +17,9 @@ package org.apache.arrow.vector.util; -import static junit.framework.TestCase.assertEquals; -import static junit.framework.TestCase.assertTrue; +import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; import java.nio.charset.StandardCharsets; import java.util.Arrays; @@ -53,9 +53,9 @@ import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.FieldType; -import org.junit.After; -import org.junit.Before; -import org.junit.Test; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; /** * Test cases for {@link VectorAppender}. @@ -64,13 +64,13 @@ public class TestVectorAppender { private BufferAllocator allocator; - @Before + @BeforeEach public void prepare() { // Permit allocating 4 vectors of max size. allocator = new RootAllocator(4 * BaseValueVector.MAX_ALLOCATION_SIZE); } - @After + @AfterEach public void shutdown() { allocator.close(); } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/util/TestVectorBatchAppender.java b/java/vector/src/test/java/org/apache/arrow/vector/util/TestVectorBatchAppender.java index 799c25c0ad71c..193736e70cadf 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/util/TestVectorBatchAppender.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/util/TestVectorBatchAppender.java @@ -17,15 +17,15 @@ package org.apache.arrow.vector.util; -import static junit.framework.TestCase.assertEquals; +import static org.junit.jupiter.api.Assertions.assertEquals; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; import org.apache.arrow.vector.IntVector; import org.apache.arrow.vector.testing.ValueVectorDataPopulator; -import org.junit.After; -import org.junit.Before; -import org.junit.Test; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; /** * Test cases for {@link VectorBatchAppender}. @@ -34,12 +34,12 @@ public class TestVectorBatchAppender { private BufferAllocator allocator; - @Before + @BeforeEach public void prepare() { allocator = new RootAllocator(1024 * 1024); } - @After + @AfterEach public void shutdown() { allocator.close(); } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/util/TestVectorSchemaRootAppender.java b/java/vector/src/test/java/org/apache/arrow/vector/util/TestVectorSchemaRootAppender.java index 6309d385870c9..82a4589c3ba64 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/util/TestVectorSchemaRootAppender.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/util/TestVectorSchemaRootAppender.java @@ -17,8 +17,8 @@ package org.apache.arrow.vector.util; -import static junit.framework.TestCase.assertEquals; import static org.apache.arrow.vector.util.TestVectorAppender.assertVectorsEqual; +import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertThrows; import org.apache.arrow.memory.BufferAllocator; @@ -28,9 +28,9 @@ import org.apache.arrow.vector.VarCharVector; import org.apache.arrow.vector.VectorSchemaRoot; import org.apache.arrow.vector.testing.ValueVectorDataPopulator; -import org.junit.After; -import org.junit.Before; -import org.junit.Test; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; /** * Test cases for {@link VectorSchemaRootAppender}. @@ -39,12 +39,12 @@ public class TestVectorSchemaRootAppender { private BufferAllocator allocator; - @Before + @BeforeEach public void prepare() { allocator = new RootAllocator(1024 * 1024); } - @After + @AfterEach public void shutdown() { allocator.close(); } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/validate/TestValidateVector.java b/java/vector/src/test/java/org/apache/arrow/vector/validate/TestValidateVector.java index 20492036dab99..837b865c30b26 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/validate/TestValidateVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/validate/TestValidateVector.java @@ -19,8 +19,8 @@ import static org.apache.arrow.vector.testing.ValueVectorDataPopulator.setVector; import static org.apache.arrow.vector.util.ValueVectorUtility.validate; -import static org.junit.Assert.assertTrue; import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; import java.nio.charset.Charset; import java.util.Arrays; @@ -44,15 +44,15 @@ import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.FieldType; -import org.junit.After; -import org.junit.Before; -import org.junit.Test; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; public class TestValidateVector { private BufferAllocator allocator; - @Before + @BeforeEach public void init() { allocator = new RootAllocator(Long.MAX_VALUE); } @@ -62,7 +62,7 @@ public void init() { private static final byte[] STR2 = "BBBBBBBBB2".getBytes(utf8Charset); private static final byte[] STR3 = "CCCC3".getBytes(utf8Charset); - @After + @AfterEach public void terminate() throws Exception { allocator.close(); } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/validate/TestValidateVectorFull.java b/java/vector/src/test/java/org/apache/arrow/vector/validate/TestValidateVectorFull.java index ca71a622bb8ea..fcf031fc33824 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/validate/TestValidateVectorFull.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/validate/TestValidateVectorFull.java @@ -19,9 +19,9 @@ import static org.apache.arrow.vector.testing.ValueVectorDataPopulator.setVector; import static org.apache.arrow.vector.util.ValueVectorUtility.validateFull; -import static org.junit.Assert.assertTrue; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; import java.nio.charset.StandardCharsets; import java.util.Arrays; @@ -48,20 +48,20 @@ import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.FieldType; -import org.junit.After; -import org.junit.Before; -import org.junit.Test; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; public class TestValidateVectorFull { private BufferAllocator allocator; - @Before + @BeforeEach public void init() { allocator = new RootAllocator(Long.MAX_VALUE); } - @After + @AfterEach public void terminate() throws Exception { allocator.close(); } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/validate/TestValidateVectorSchemaRoot.java b/java/vector/src/test/java/org/apache/arrow/vector/validate/TestValidateVectorSchemaRoot.java index 1885fb21f17b6..bdb9ad3e8e530 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/validate/TestValidateVectorSchemaRoot.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/validate/TestValidateVectorSchemaRoot.java @@ -19,8 +19,8 @@ import static org.apache.arrow.vector.util.ValueVectorUtility.validate; import static org.apache.arrow.vector.util.ValueVectorUtility.validateFull; -import static org.junit.Assert.assertTrue; import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; import org.apache.arrow.memory.ArrowBuf; import org.apache.arrow.memory.BufferAllocator; @@ -29,20 +29,20 @@ import org.apache.arrow.vector.VarCharVector; import org.apache.arrow.vector.VectorSchemaRoot; import org.apache.arrow.vector.testing.ValueVectorDataPopulator; -import org.junit.After; -import org.junit.Before; -import org.junit.Test; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; public class TestValidateVectorSchemaRoot { private BufferAllocator allocator; - @Before + @BeforeEach public void init() { allocator = new RootAllocator(Long.MAX_VALUE); } - @After + @AfterEach public void terminate() throws Exception { allocator.close(); } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/validate/TestValidateVectorTypeVisitor.java b/java/vector/src/test/java/org/apache/arrow/vector/validate/TestValidateVectorTypeVisitor.java index 0ddd790d6ffab..42297e1d37fe0 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/validate/TestValidateVectorTypeVisitor.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/validate/TestValidateVectorTypeVisitor.java @@ -70,9 +70,9 @@ import org.apache.arrow.vector.types.Types; import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.types.pojo.FieldType; -import org.junit.After; -import org.junit.Before; -import org.junit.Test; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; /** * Test cases for {@link ValidateVectorTypeVisitor}. @@ -83,12 +83,12 @@ public class TestValidateVectorTypeVisitor { private ValidateVectorTypeVisitor visitor = new ValidateVectorTypeVisitor(); - @Before + @BeforeEach public void init() { allocator = new RootAllocator(Long.MAX_VALUE); } - @After + @AfterEach public void terminate() throws Exception { allocator.close(); } diff --git a/js/bin/integration.ts b/js/bin/integration.ts index f73388cc85cf0..d41ce08aa16b6 100755 --- a/js/bin/integration.ts +++ b/js/bin/integration.ts @@ -20,7 +20,7 @@ import * as fs from 'node:fs'; import * as Path from 'node:path'; import { glob } from 'glob'; -import { zip } from 'ix/iterable/zip.js'; +import { zip } from 'ix/iterable/zip'; import commandLineArgs from 'command-line-args'; // @ts-ignore import { parse as bignumJSONParse } from 'json-bignum'; diff --git a/js/package.json b/js/package.json index fee6b342dbd13..ecb6d3a366f7e 100644 --- a/js/package.json +++ b/js/package.json @@ -52,10 +52,10 @@ "jest.config.js" ], "dependencies": { - "@swc/helpers": "^0.5.10", + "@swc/helpers": "^0.5.11", "@types/command-line-args": "^5.2.3", "@types/command-line-usage": "^5.0.4", - "@types/node": "^20.12.7", + "@types/node": "^20.13.0", "command-line-args": "^5.2.1", "command-line-usage": "^7.0.1", "flatbuffers": "^24.3.25", @@ -67,26 +67,26 @@ "@rollup/plugin-alias": "5.1.0", "@rollup/plugin-node-resolve": "15.2.3", "@rollup/stream": "3.0.1", - "@swc/core": "1.4.14", + "@swc/core": "1.5.24", "@types/benchmark": "2.1.5", "@types/glob": "8.1.0", "@types/jest": "29.5.12", "@types/multistream": "4.1.3", - "@typescript-eslint/eslint-plugin": "7.7.0", - "@typescript-eslint/parser": "7.7.0", + "@typescript-eslint/eslint-plugin": "7.12.0", + "@typescript-eslint/parser": "7.12.0", "async-done": "2.0.0", "benny": "3.7.1", "cross-env": "7.0.3", "del": "7.1.0", "del-cli": "5.1.0", - "esbuild": "0.20.2", + "esbuild": "0.21.4", "esbuild-plugin-alias": "0.2.1", "eslint": "8.57.0", - "eslint-plugin-jest": "27.9.0", - "eslint-plugin-unicorn": "52.0.0", + "eslint-plugin-jest": "28.5.0", + "eslint-plugin-unicorn": "53.0.0", "esm": "https://github.com/jsg2021/esm/releases/download/v3.x.x-pr883/esm-3.x.x-pr883.tgz", "gulp": "4.0.2", - "glob": "10.3.12", + "glob": "10.4.1", "google-closure-compiler": "20240317.0.0", "gulp-esbuild": "0.12.0", "gulp-json-transform": "0.5.0", @@ -96,16 +96,16 @@ "gulp-terser": "2.1.0", "gulp-typescript": "5.0.1", "gulp-vinyl-size": "1.1.4", - "ix": "5.0.0", + "ix": "6.0.0", "jest": "29.7.0", - "jest-silent-reporter": "0.5.0", - "memfs": "4.8.2", + "jest-silent-reporter": "0.6.0", + "memfs": "4.9.2", "mkdirp": "3.0.1", "multistream": "4.1.0", "regenerator-runtime": "0.14.1", - "rollup": "4.14.3", + "rollup": "4.18.0", "rxjs": "7.8.1", - "ts-jest": "29.1.2", + "ts-jest": "29.1.4", "ts-node": "10.9.2", "typedoc": "0.25.13", "typescript": "5.4.5", diff --git a/js/test/unit/builders/builder-tests.ts b/js/test/unit/builders/builder-tests.ts index 4d1be9b225b08..c9174023f6dae 100644 --- a/js/test/unit/builders/builder-tests.ts +++ b/js/test/unit/builders/builder-tests.ts @@ -18,7 +18,7 @@ import 'web-streams-polyfill'; import '../../jest-extensions.js'; -import { from, fromDOMStream, toArray } from 'ix/asynciterable'; +import { from, fromDOMStream, toArray } from 'ix/Ix.asynciterable'; import { fromNodeStream } from 'ix/asynciterable/fromnodestream'; import { validateVector } from './utils.js'; diff --git a/js/test/unit/builders/utils.ts b/js/test/unit/builders/utils.ts index 1d0707a6ca5d9..7cc0a075d84d4 100644 --- a/js/test/unit/builders/utils.ts +++ b/js/test/unit/builders/utils.ts @@ -17,7 +17,7 @@ import 'web-streams-polyfill'; -import { from, fromDOMStream, toArray } from 'ix/asynciterable'; +import { from, fromDOMStream, toArray } from 'ix/Ix.asynciterable'; import { fromNodeStream } from 'ix/asynciterable/fromnodestream'; import 'ix/Ix.node'; diff --git a/js/test/unit/ipc/writer/streams-dom-tests.ts b/js/test/unit/ipc/writer/streams-dom-tests.ts index dc792c9cf82be..2040e89a48802 100644 --- a/js/test/unit/ipc/writer/streams-dom-tests.ts +++ b/js/test/unit/ipc/writer/streams-dom-tests.ts @@ -15,8 +15,8 @@ // specific language governing permissions and limitations // under the License. -import { as, from } from 'ix/asynciterable'; -import { flatMap, tap } from 'ix/asynciterable/operators'; +import { as, from } from 'ix/Ix.asynciterable'; +import { flatMap, tap } from 'ix/Ix.asynciterable.operators'; import { generateRandomTables } from '../../../data/tables.js'; import { diff --git a/js/test/unit/ipc/writer/streams-node-tests.ts b/js/test/unit/ipc/writer/streams-node-tests.ts index 1f4c9c7a02cfb..afcb6deb1e053 100644 --- a/js/test/unit/ipc/writer/streams-node-tests.ts +++ b/js/test/unit/ipc/writer/streams-node-tests.ts @@ -15,8 +15,8 @@ // specific language governing permissions and limitations // under the License. -import { as, from } from 'ix/asynciterable'; -import { flatMap, tap } from 'ix/asynciterable/operators'; +import { as, from } from 'ix/Ix.asynciterable'; +import { flatMap, tap } from 'ix/Ix.asynciterable.operators'; import 'ix/Ix.node'; import { generateRandomTables } from '../../../data/tables.js'; diff --git a/js/test/unit/table/assign-tests.ts b/js/test/unit/table/assign-tests.ts index f2a5ff4f37cac..8e1cdfec41cbf 100644 --- a/js/test/unit/table/assign-tests.ts +++ b/js/test/unit/table/assign-tests.ts @@ -17,7 +17,7 @@ /* eslint-disable jest/no-standalone-expect */ -import { zip } from 'ix/iterable'; +import { zip } from 'ix/Ix.iterable'; import '../../jest-extensions.js'; import * as generate from '../../generate-test-data.js'; diff --git a/js/yarn.lock b/js/yarn.lock index b74e4543d9d4e..5ab52beaf8f15 100644 --- a/js/yarn.lock +++ b/js/yarn.lock @@ -174,10 +174,10 @@ resolved "https://registry.yarnpkg.com/@babel/helper-string-parser/-/helper-string-parser-7.24.1.tgz#f99c36d3593db9540705d0739a1f10b5e20c696e" integrity sha512-2ofRCjnnA9y+wk8b9IAREroeUP02KHp431N2mhKniy2yKIDKpbrHv9eXwm8cBeWQYcJmzv5qKCu65P47eCF7CQ== -"@babel/helper-validator-identifier@^7.22.20": - version "7.22.20" - resolved "https://registry.yarnpkg.com/@babel/helper-validator-identifier/-/helper-validator-identifier-7.22.20.tgz#c4ae002c61d2879e724581d96665583dbc1dc0e0" - integrity sha512-Y4OZ+ytlatR8AI+8KZfKuL5urKp7qey08ha31L8b3BwewJAoJamTzyvxPR/5D+KkdJCGPq/+8TukHBlY10FX9A== +"@babel/helper-validator-identifier@^7.22.20", "@babel/helper-validator-identifier@^7.24.5": + version "7.24.6" + resolved "https://registry.yarnpkg.com/@babel/helper-validator-identifier/-/helper-validator-identifier-7.24.6.tgz#08bb6612b11bdec78f3feed3db196da682454a5e" + integrity sha512-4yA7s865JHaqUdRbnaxarZREuPTHrjpDT+pXoAZ1yhyo6uFnIEpS8VMu16siFOHDpZNKYv5BObhsB//ycbICyw== "@babel/helper-validator-option@^7.23.5": version "7.23.5" @@ -362,230 +362,230 @@ resolved "https://registry.yarnpkg.com/@esbuild/aix-ppc64/-/aix-ppc64-0.19.12.tgz#d1bc06aedb6936b3b6d313bf809a5a40387d2b7f" integrity sha512-bmoCYyWdEL3wDQIVbcyzRyeKLgk2WtWLTWz1ZIAZF/EGbNOwSA6ew3PftJ1PqMiOOGu0OyFMzG53L0zqIpPeNA== -"@esbuild/aix-ppc64@0.20.2": - version "0.20.2" - resolved "https://registry.yarnpkg.com/@esbuild/aix-ppc64/-/aix-ppc64-0.20.2.tgz#a70f4ac11c6a1dfc18b8bbb13284155d933b9537" - integrity sha512-D+EBOJHXdNZcLJRBkhENNG8Wji2kgc9AZ9KiPr1JuZjsNtyHzrsfLRrY0tk2H2aoFu6RANO1y1iPPUCDYWkb5g== +"@esbuild/aix-ppc64@0.21.4": + version "0.21.4" + resolved "https://registry.yarnpkg.com/@esbuild/aix-ppc64/-/aix-ppc64-0.21.4.tgz#f83eb142df3ca7b49531c1ed680b81e484316508" + integrity sha512-Zrm+B33R4LWPLjDEVnEqt2+SLTATlru1q/xYKVn8oVTbiRBGmK2VIMoIYGJDGyftnGaC788IuzGFAlb7IQ0Y8A== "@esbuild/android-arm64@0.19.12": version "0.19.12" resolved "https://registry.yarnpkg.com/@esbuild/android-arm64/-/android-arm64-0.19.12.tgz#7ad65a36cfdb7e0d429c353e00f680d737c2aed4" integrity sha512-P0UVNGIienjZv3f5zq0DP3Nt2IE/3plFzuaS96vihvD0Hd6H/q4WXUGpCxD/E8YrSXfNyRPbpTq+T8ZQioSuPA== -"@esbuild/android-arm64@0.20.2": - version "0.20.2" - resolved "https://registry.yarnpkg.com/@esbuild/android-arm64/-/android-arm64-0.20.2.tgz#db1c9202a5bc92ea04c7b6840f1bbe09ebf9e6b9" - integrity sha512-mRzjLacRtl/tWU0SvD8lUEwb61yP9cqQo6noDZP/O8VkwafSYwZ4yWy24kan8jE/IMERpYncRt2dw438LP3Xmg== +"@esbuild/android-arm64@0.21.4": + version "0.21.4" + resolved "https://registry.yarnpkg.com/@esbuild/android-arm64/-/android-arm64-0.21.4.tgz#dd328039daccd6033b2d1e536c054914bfc92287" + integrity sha512-fYFnz+ObClJ3dNiITySBUx+oNalYUT18/AryMxfovLkYWbutXsct3Wz2ZWAcGGppp+RVVX5FiXeLYGi97umisA== "@esbuild/android-arm@0.19.12": version "0.19.12" resolved "https://registry.yarnpkg.com/@esbuild/android-arm/-/android-arm-0.19.12.tgz#b0c26536f37776162ca8bde25e42040c203f2824" integrity sha512-qg/Lj1mu3CdQlDEEiWrlC4eaPZ1KztwGJ9B6J+/6G+/4ewxJg7gqj8eVYWvao1bXrqGiW2rsBZFSX3q2lcW05w== -"@esbuild/android-arm@0.20.2": - version "0.20.2" - resolved "https://registry.yarnpkg.com/@esbuild/android-arm/-/android-arm-0.20.2.tgz#3b488c49aee9d491c2c8f98a909b785870d6e995" - integrity sha512-t98Ra6pw2VaDhqNWO2Oph2LXbz/EJcnLmKLGBJwEwXX/JAN83Fym1rU8l0JUWK6HkIbWONCSSatf4sf2NBRx/w== +"@esbuild/android-arm@0.21.4": + version "0.21.4" + resolved "https://registry.yarnpkg.com/@esbuild/android-arm/-/android-arm-0.21.4.tgz#76767a989720a97b206ea14c52af6e4589e48b0d" + integrity sha512-E7H/yTd8kGQfY4z9t3nRPk/hrhaCajfA3YSQSBrst8B+3uTcgsi8N+ZWYCaeIDsiVs6m65JPCaQN/DxBRclF3A== "@esbuild/android-x64@0.19.12": version "0.19.12" resolved "https://registry.yarnpkg.com/@esbuild/android-x64/-/android-x64-0.19.12.tgz#cb13e2211282012194d89bf3bfe7721273473b3d" integrity sha512-3k7ZoUW6Q6YqhdhIaq/WZ7HwBpnFBlW905Fa4s4qWJyiNOgT1dOqDiVAQFwBH7gBRZr17gLrlFCRzF6jFh7Kew== -"@esbuild/android-x64@0.20.2": - version "0.20.2" - resolved "https://registry.yarnpkg.com/@esbuild/android-x64/-/android-x64-0.20.2.tgz#3b1628029e5576249d2b2d766696e50768449f98" - integrity sha512-btzExgV+/lMGDDa194CcUQm53ncxzeBrWJcncOBxuC6ndBkKxnHdFJn86mCIgTELsooUmwUm9FkhSp5HYu00Rg== +"@esbuild/android-x64@0.21.4": + version "0.21.4" + resolved "https://registry.yarnpkg.com/@esbuild/android-x64/-/android-x64-0.21.4.tgz#14a8ae3c35702d882086efb5a8f8d7b0038d8d35" + integrity sha512-mDqmlge3hFbEPbCWxp4fM6hqq7aZfLEHZAKGP9viq9wMUBVQx202aDIfc3l+d2cKhUJM741VrCXEzRFhPDKH3Q== "@esbuild/darwin-arm64@0.19.12": version "0.19.12" resolved "https://registry.yarnpkg.com/@esbuild/darwin-arm64/-/darwin-arm64-0.19.12.tgz#cbee41e988020d4b516e9d9e44dd29200996275e" integrity sha512-B6IeSgZgtEzGC42jsI+YYu9Z3HKRxp8ZT3cqhvliEHovq8HSX2YX8lNocDn79gCKJXOSaEot9MVYky7AKjCs8g== -"@esbuild/darwin-arm64@0.20.2": - version "0.20.2" - resolved "https://registry.yarnpkg.com/@esbuild/darwin-arm64/-/darwin-arm64-0.20.2.tgz#6e8517a045ddd86ae30c6608c8475ebc0c4000bb" - integrity sha512-4J6IRT+10J3aJH3l1yzEg9y3wkTDgDk7TSDFX+wKFiWjqWp/iCfLIYzGyasx9l0SAFPT1HwSCR+0w/h1ES/MjA== +"@esbuild/darwin-arm64@0.21.4": + version "0.21.4" + resolved "https://registry.yarnpkg.com/@esbuild/darwin-arm64/-/darwin-arm64-0.21.4.tgz#7e735046005e4c12e9139e0bdd1fa6a754430d57" + integrity sha512-72eaIrDZDSiWqpmCzVaBD58c8ea8cw/U0fq/PPOTqE3c53D0xVMRt2ooIABZ6/wj99Y+h4ksT/+I+srCDLU9TA== "@esbuild/darwin-x64@0.19.12": version "0.19.12" resolved "https://registry.yarnpkg.com/@esbuild/darwin-x64/-/darwin-x64-0.19.12.tgz#e37d9633246d52aecf491ee916ece709f9d5f4cd" integrity sha512-hKoVkKzFiToTgn+41qGhsUJXFlIjxI/jSYeZf3ugemDYZldIXIxhvwN6erJGlX4t5h417iFuheZ7l+YVn05N3A== -"@esbuild/darwin-x64@0.20.2": - version "0.20.2" - resolved "https://registry.yarnpkg.com/@esbuild/darwin-x64/-/darwin-x64-0.20.2.tgz#90ed098e1f9dd8a9381695b207e1cff45540a0d0" - integrity sha512-tBcXp9KNphnNH0dfhv8KYkZhjc+H3XBkF5DKtswJblV7KlT9EI2+jeA8DgBjp908WEuYll6pF+UStUCfEpdysA== +"@esbuild/darwin-x64@0.21.4": + version "0.21.4" + resolved "https://registry.yarnpkg.com/@esbuild/darwin-x64/-/darwin-x64-0.21.4.tgz#db623553547a5fe3502a63aa88306e9023178482" + integrity sha512-uBsuwRMehGmw1JC7Vecu/upOjTsMhgahmDkWhGLWxIgUn2x/Y4tIwUZngsmVb6XyPSTXJYS4YiASKPcm9Zitag== "@esbuild/freebsd-arm64@0.19.12": version "0.19.12" resolved "https://registry.yarnpkg.com/@esbuild/freebsd-arm64/-/freebsd-arm64-0.19.12.tgz#1ee4d8b682ed363b08af74d1ea2b2b4dbba76487" integrity sha512-4aRvFIXmwAcDBw9AueDQ2YnGmz5L6obe5kmPT8Vd+/+x/JMVKCgdcRwH6APrbpNXsPz+K653Qg8HB/oXvXVukA== -"@esbuild/freebsd-arm64@0.20.2": - version "0.20.2" - resolved "https://registry.yarnpkg.com/@esbuild/freebsd-arm64/-/freebsd-arm64-0.20.2.tgz#d71502d1ee89a1130327e890364666c760a2a911" - integrity sha512-d3qI41G4SuLiCGCFGUrKsSeTXyWG6yem1KcGZVS+3FYlYhtNoNgYrWcvkOoaqMhwXSMrZRl69ArHsGJ9mYdbbw== +"@esbuild/freebsd-arm64@0.21.4": + version "0.21.4" + resolved "https://registry.yarnpkg.com/@esbuild/freebsd-arm64/-/freebsd-arm64-0.21.4.tgz#91cbad647c079bf932086fbd4749d7f563df67b8" + integrity sha512-8JfuSC6YMSAEIZIWNL3GtdUT5NhUA/CMUCpZdDRolUXNAXEE/Vbpe6qlGLpfThtY5NwXq8Hi4nJy4YfPh+TwAg== "@esbuild/freebsd-x64@0.19.12": version "0.19.12" resolved "https://registry.yarnpkg.com/@esbuild/freebsd-x64/-/freebsd-x64-0.19.12.tgz#37a693553d42ff77cd7126764b535fb6cc28a11c" integrity sha512-EYoXZ4d8xtBoVN7CEwWY2IN4ho76xjYXqSXMNccFSx2lgqOG/1TBPW0yPx1bJZk94qu3tX0fycJeeQsKovA8gg== -"@esbuild/freebsd-x64@0.20.2": - version "0.20.2" - resolved "https://registry.yarnpkg.com/@esbuild/freebsd-x64/-/freebsd-x64-0.20.2.tgz#aa5ea58d9c1dd9af688b8b6f63ef0d3d60cea53c" - integrity sha512-d+DipyvHRuqEeM5zDivKV1KuXn9WeRX6vqSqIDgwIfPQtwMP4jaDsQsDncjTDDsExT4lR/91OLjRo8bmC1e+Cw== +"@esbuild/freebsd-x64@0.21.4": + version "0.21.4" + resolved "https://registry.yarnpkg.com/@esbuild/freebsd-x64/-/freebsd-x64-0.21.4.tgz#723299b9859ccbe5532fecbadba3ac33019ba8e8" + integrity sha512-8d9y9eQhxv4ef7JmXny7591P/PYsDFc4+STaxC1GBv0tMyCdyWfXu2jBuqRsyhY8uL2HU8uPyscgE2KxCY9imQ== "@esbuild/linux-arm64@0.19.12": version "0.19.12" resolved "https://registry.yarnpkg.com/@esbuild/linux-arm64/-/linux-arm64-0.19.12.tgz#be9b145985ec6c57470e0e051d887b09dddb2d4b" integrity sha512-EoTjyYyLuVPfdPLsGVVVC8a0p1BFFvtpQDB/YLEhaXyf/5bczaGeN15QkR+O4S5LeJ92Tqotve7i1jn35qwvdA== -"@esbuild/linux-arm64@0.20.2": - version "0.20.2" - resolved "https://registry.yarnpkg.com/@esbuild/linux-arm64/-/linux-arm64-0.20.2.tgz#055b63725df678379b0f6db9d0fa85463755b2e5" - integrity sha512-9pb6rBjGvTFNira2FLIWqDk/uaf42sSyLE8j1rnUpuzsODBq7FvpwHYZxQ/It/8b+QOS1RYfqgGFNLRI+qlq2A== +"@esbuild/linux-arm64@0.21.4": + version "0.21.4" + resolved "https://registry.yarnpkg.com/@esbuild/linux-arm64/-/linux-arm64-0.21.4.tgz#531743f861e1ef6e50b874d6c784cda37aa5e685" + integrity sha512-/GLD2orjNU50v9PcxNpYZi+y8dJ7e7/LhQukN3S4jNDXCKkyyiyAz9zDw3siZ7Eh1tRcnCHAo/WcqKMzmi4eMQ== "@esbuild/linux-arm@0.19.12": version "0.19.12" resolved "https://registry.yarnpkg.com/@esbuild/linux-arm/-/linux-arm-0.19.12.tgz#207ecd982a8db95f7b5279207d0ff2331acf5eef" integrity sha512-J5jPms//KhSNv+LO1S1TX1UWp1ucM6N6XuL6ITdKWElCu8wXP72l9MM0zDTzzeikVyqFE6U8YAV9/tFyj0ti+w== -"@esbuild/linux-arm@0.20.2": - version "0.20.2" - resolved "https://registry.yarnpkg.com/@esbuild/linux-arm/-/linux-arm-0.20.2.tgz#76b3b98cb1f87936fbc37f073efabad49dcd889c" - integrity sha512-VhLPeR8HTMPccbuWWcEUD1Az68TqaTYyj6nfE4QByZIQEQVWBB8vup8PpR7y1QHL3CpcF6xd5WVBU/+SBEvGTg== +"@esbuild/linux-arm@0.21.4": + version "0.21.4" + resolved "https://registry.yarnpkg.com/@esbuild/linux-arm/-/linux-arm-0.21.4.tgz#1144b5654764960dd97d90ddf0893a9afc63ad91" + integrity sha512-2rqFFefpYmpMs+FWjkzSgXg5vViocqpq5a1PSRgT0AvSgxoXmGF17qfGAzKedg6wAwyM7UltrKVo9kxaJLMF/g== "@esbuild/linux-ia32@0.19.12": version "0.19.12" resolved "https://registry.yarnpkg.com/@esbuild/linux-ia32/-/linux-ia32-0.19.12.tgz#d0d86b5ca1562523dc284a6723293a52d5860601" integrity sha512-Thsa42rrP1+UIGaWz47uydHSBOgTUnwBwNq59khgIwktK6x60Hivfbux9iNR0eHCHzOLjLMLfUMLCypBkZXMHA== -"@esbuild/linux-ia32@0.20.2": - version "0.20.2" - resolved "https://registry.yarnpkg.com/@esbuild/linux-ia32/-/linux-ia32-0.20.2.tgz#c0e5e787c285264e5dfc7a79f04b8b4eefdad7fa" - integrity sha512-o10utieEkNPFDZFQm9CoP7Tvb33UutoJqg3qKf1PWVeeJhJw0Q347PxMvBgVVFgouYLGIhFYG0UGdBumROyiig== +"@esbuild/linux-ia32@0.21.4": + version "0.21.4" + resolved "https://registry.yarnpkg.com/@esbuild/linux-ia32/-/linux-ia32-0.21.4.tgz#c81b6f2ed3308d3b75ccefb5ac63bc4cf3a9d2e9" + integrity sha512-pNftBl7m/tFG3t2m/tSjuYeWIffzwAZT9m08+9DPLizxVOsUl8DdFzn9HvJrTQwe3wvJnwTdl92AonY36w/25g== "@esbuild/linux-loong64@0.19.12": version "0.19.12" resolved "https://registry.yarnpkg.com/@esbuild/linux-loong64/-/linux-loong64-0.19.12.tgz#9a37f87fec4b8408e682b528391fa22afd952299" integrity sha512-LiXdXA0s3IqRRjm6rV6XaWATScKAXjI4R4LoDlvO7+yQqFdlr1Bax62sRwkVvRIrwXxvtYEHHI4dm50jAXkuAA== -"@esbuild/linux-loong64@0.20.2": - version "0.20.2" - resolved "https://registry.yarnpkg.com/@esbuild/linux-loong64/-/linux-loong64-0.20.2.tgz#a6184e62bd7cdc63e0c0448b83801001653219c5" - integrity sha512-PR7sp6R/UC4CFVomVINKJ80pMFlfDfMQMYynX7t1tNTeivQ6XdX5r2XovMmha/VjR1YN/HgHWsVcTRIMkymrgQ== +"@esbuild/linux-loong64@0.21.4": + version "0.21.4" + resolved "https://registry.yarnpkg.com/@esbuild/linux-loong64/-/linux-loong64-0.21.4.tgz#87b6af7cd0f2551653955fc2dc465b7f4464af0a" + integrity sha512-cSD2gzCK5LuVX+hszzXQzlWya6c7hilO71L9h4KHwqI4qeqZ57bAtkgcC2YioXjsbfAv4lPn3qe3b00Zt+jIfQ== "@esbuild/linux-mips64el@0.19.12": version "0.19.12" resolved "https://registry.yarnpkg.com/@esbuild/linux-mips64el/-/linux-mips64el-0.19.12.tgz#4ddebd4e6eeba20b509d8e74c8e30d8ace0b89ec" integrity sha512-fEnAuj5VGTanfJ07ff0gOA6IPsvrVHLVb6Lyd1g2/ed67oU1eFzL0r9WL7ZzscD+/N6i3dWumGE1Un4f7Amf+w== -"@esbuild/linux-mips64el@0.20.2": - version "0.20.2" - resolved "https://registry.yarnpkg.com/@esbuild/linux-mips64el/-/linux-mips64el-0.20.2.tgz#d08e39ce86f45ef8fc88549d29c62b8acf5649aa" - integrity sha512-4BlTqeutE/KnOiTG5Y6Sb/Hw6hsBOZapOVF6njAESHInhlQAghVVZL1ZpIctBOoTFbQyGW+LsVYZ8lSSB3wkjA== +"@esbuild/linux-mips64el@0.21.4": + version "0.21.4" + resolved "https://registry.yarnpkg.com/@esbuild/linux-mips64el/-/linux-mips64el-0.21.4.tgz#fec73cd39490a0c45d052bef03e011a0ad366c06" + integrity sha512-qtzAd3BJh7UdbiXCrg6npWLYU0YpufsV9XlufKhMhYMJGJCdfX/G6+PNd0+v877X1JG5VmjBLUiFB0o8EUSicA== "@esbuild/linux-ppc64@0.19.12": version "0.19.12" resolved "https://registry.yarnpkg.com/@esbuild/linux-ppc64/-/linux-ppc64-0.19.12.tgz#adb67dadb73656849f63cd522f5ecb351dd8dee8" integrity sha512-nYJA2/QPimDQOh1rKWedNOe3Gfc8PabU7HT3iXWtNUbRzXS9+vgB0Fjaqr//XNbd82mCxHzik2qotuI89cfixg== -"@esbuild/linux-ppc64@0.20.2": - version "0.20.2" - resolved "https://registry.yarnpkg.com/@esbuild/linux-ppc64/-/linux-ppc64-0.20.2.tgz#8d252f0b7756ffd6d1cbde5ea67ff8fd20437f20" - integrity sha512-rD3KsaDprDcfajSKdn25ooz5J5/fWBylaaXkuotBDGnMnDP1Uv5DLAN/45qfnf3JDYyJv/ytGHQaziHUdyzaAg== +"@esbuild/linux-ppc64@0.21.4": + version "0.21.4" + resolved "https://registry.yarnpkg.com/@esbuild/linux-ppc64/-/linux-ppc64-0.21.4.tgz#ea3b5e13b0fc8666bd4c6f7ea58bd1830f3e6e78" + integrity sha512-yB8AYzOTaL0D5+2a4xEy7OVvbcypvDR05MsB/VVPVA7nL4hc5w5Dyd/ddnayStDgJE59fAgNEOdLhBxjfx5+dg== "@esbuild/linux-riscv64@0.19.12": version "0.19.12" resolved "https://registry.yarnpkg.com/@esbuild/linux-riscv64/-/linux-riscv64-0.19.12.tgz#11bc0698bf0a2abf8727f1c7ace2112612c15adf" integrity sha512-2MueBrlPQCw5dVJJpQdUYgeqIzDQgw3QtiAHUC4RBz9FXPrskyyU3VI1hw7C0BSKB9OduwSJ79FTCqtGMWqJHg== -"@esbuild/linux-riscv64@0.20.2": - version "0.20.2" - resolved "https://registry.yarnpkg.com/@esbuild/linux-riscv64/-/linux-riscv64-0.20.2.tgz#19f6dcdb14409dae607f66ca1181dd4e9db81300" - integrity sha512-snwmBKacKmwTMmhLlz/3aH1Q9T8v45bKYGE3j26TsaOVtjIag4wLfWSiZykXzXuE1kbCE+zJRmwp+ZbIHinnVg== +"@esbuild/linux-riscv64@0.21.4": + version "0.21.4" + resolved "https://registry.yarnpkg.com/@esbuild/linux-riscv64/-/linux-riscv64-0.21.4.tgz#80d406f653fc6b193edaeb55ac88d4ac22c8f155" + integrity sha512-Y5AgOuVzPjQdgU59ramLoqSSiXddu7F3F+LI5hYy/d1UHN7K5oLzYBDZe23QmQJ9PIVUXwOdKJ/jZahPdxzm9w== "@esbuild/linux-s390x@0.19.12": version "0.19.12" resolved "https://registry.yarnpkg.com/@esbuild/linux-s390x/-/linux-s390x-0.19.12.tgz#e86fb8ffba7c5c92ba91fc3b27ed5a70196c3cc8" integrity sha512-+Pil1Nv3Umes4m3AZKqA2anfhJiVmNCYkPchwFJNEJN5QxmTs1uzyy4TvmDrCRNT2ApwSari7ZIgrPeUx4UZDg== -"@esbuild/linux-s390x@0.20.2": - version "0.20.2" - resolved "https://registry.yarnpkg.com/@esbuild/linux-s390x/-/linux-s390x-0.20.2.tgz#3c830c90f1a5d7dd1473d5595ea4ebb920988685" - integrity sha512-wcWISOobRWNm3cezm5HOZcYz1sKoHLd8VL1dl309DiixxVFoFe/o8HnwuIwn6sXre88Nwj+VwZUvJf4AFxkyrQ== +"@esbuild/linux-s390x@0.21.4": + version "0.21.4" + resolved "https://registry.yarnpkg.com/@esbuild/linux-s390x/-/linux-s390x-0.21.4.tgz#9cbd26854b5b12cf22fb54c96cd1adffaf6ace6f" + integrity sha512-Iqc/l/FFwtt8FoTK9riYv9zQNms7B8u+vAI/rxKuN10HgQIXaPzKZc479lZ0x6+vKVQbu55GdpYpeNWzjOhgbA== "@esbuild/linux-x64@0.19.12": version "0.19.12" resolved "https://registry.yarnpkg.com/@esbuild/linux-x64/-/linux-x64-0.19.12.tgz#5f37cfdc705aea687dfe5dfbec086a05acfe9c78" integrity sha512-B71g1QpxfwBvNrfyJdVDexenDIt1CiDN1TIXLbhOw0KhJzE78KIFGX6OJ9MrtC0oOqMWf+0xop4qEU8JrJTwCg== -"@esbuild/linux-x64@0.20.2": - version "0.20.2" - resolved "https://registry.yarnpkg.com/@esbuild/linux-x64/-/linux-x64-0.20.2.tgz#86eca35203afc0d9de0694c64ec0ab0a378f6fff" - integrity sha512-1MdwI6OOTsfQfek8sLwgyjOXAu+wKhLEoaOLTjbijk6E2WONYpH9ZU2mNtR+lZ2B4uwr+usqGuVfFT9tMtGvGw== +"@esbuild/linux-x64@0.21.4": + version "0.21.4" + resolved "https://registry.yarnpkg.com/@esbuild/linux-x64/-/linux-x64-0.21.4.tgz#44dfe1c5cad855362c830c604dba97fbb16fc114" + integrity sha512-Td9jv782UMAFsuLZINfUpoF5mZIbAj+jv1YVtE58rFtfvoKRiKSkRGQfHTgKamLVT/fO7203bHa3wU122V/Bdg== "@esbuild/netbsd-x64@0.19.12": version "0.19.12" resolved "https://registry.yarnpkg.com/@esbuild/netbsd-x64/-/netbsd-x64-0.19.12.tgz#29da566a75324e0d0dd7e47519ba2f7ef168657b" integrity sha512-3ltjQ7n1owJgFbuC61Oj++XhtzmymoCihNFgT84UAmJnxJfm4sYCiSLTXZtE00VWYpPMYc+ZQmB6xbSdVh0JWA== -"@esbuild/netbsd-x64@0.20.2": - version "0.20.2" - resolved "https://registry.yarnpkg.com/@esbuild/netbsd-x64/-/netbsd-x64-0.20.2.tgz#e771c8eb0e0f6e1877ffd4220036b98aed5915e6" - integrity sha512-K8/DhBxcVQkzYc43yJXDSyjlFeHQJBiowJ0uVL6Tor3jGQfSGHNNJcWxNbOI8v5k82prYqzPuwkzHt3J1T1iZQ== +"@esbuild/netbsd-x64@0.21.4": + version "0.21.4" + resolved "https://registry.yarnpkg.com/@esbuild/netbsd-x64/-/netbsd-x64-0.21.4.tgz#89b97d823e1cc4bf8c4e5dc8f76c8d6ceb1c87f3" + integrity sha512-Awn38oSXxsPMQxaV0Ipb7W/gxZtk5Tx3+W+rAPdZkyEhQ6968r9NvtkjhnhbEgWXYbgV+JEONJ6PcdBS+nlcpA== "@esbuild/openbsd-x64@0.19.12": version "0.19.12" resolved "https://registry.yarnpkg.com/@esbuild/openbsd-x64/-/openbsd-x64-0.19.12.tgz#306c0acbdb5a99c95be98bdd1d47c916e7dc3ff0" integrity sha512-RbrfTB9SWsr0kWmb9srfF+L933uMDdu9BIzdA7os2t0TXhCRjrQyCeOt6wVxr79CKD4c+p+YhCj31HBkYcXebw== -"@esbuild/openbsd-x64@0.20.2": - version "0.20.2" - resolved "https://registry.yarnpkg.com/@esbuild/openbsd-x64/-/openbsd-x64-0.20.2.tgz#9a795ae4b4e37e674f0f4d716f3e226dd7c39baf" - integrity sha512-eMpKlV0SThJmmJgiVyN9jTPJ2VBPquf6Kt/nAoo6DgHAoN57K15ZghiHaMvqjCye/uU4X5u3YSMgVBI1h3vKrQ== +"@esbuild/openbsd-x64@0.21.4": + version "0.21.4" + resolved "https://registry.yarnpkg.com/@esbuild/openbsd-x64/-/openbsd-x64-0.21.4.tgz#080715bb4981c326364320d7b56835608e2bd98d" + integrity sha512-IsUmQeCY0aU374R82fxIPu6vkOybWIMc3hVGZ3ChRwL9hA1TwY+tS0lgFWV5+F1+1ssuvvXt3HFqe8roCip8Hg== "@esbuild/sunos-x64@0.19.12": version "0.19.12" resolved "https://registry.yarnpkg.com/@esbuild/sunos-x64/-/sunos-x64-0.19.12.tgz#0933eaab9af8b9b2c930236f62aae3fc593faf30" integrity sha512-HKjJwRrW8uWtCQnQOz9qcU3mUZhTUQvi56Q8DPTLLB+DawoiQdjsYq+j+D3s9I8VFtDr+F9CjgXKKC4ss89IeA== -"@esbuild/sunos-x64@0.20.2": - version "0.20.2" - resolved "https://registry.yarnpkg.com/@esbuild/sunos-x64/-/sunos-x64-0.20.2.tgz#7df23b61a497b8ac189def6e25a95673caedb03f" - integrity sha512-2UyFtRC6cXLyejf/YEld4Hajo7UHILetzE1vsRcGL3earZEW77JxrFjH4Ez2qaTiEfMgAXxfAZCm1fvM/G/o8w== +"@esbuild/sunos-x64@0.21.4": + version "0.21.4" + resolved "https://registry.yarnpkg.com/@esbuild/sunos-x64/-/sunos-x64-0.21.4.tgz#8d838a8ac80e211536490108b72fb0091a811626" + integrity sha512-hsKhgZ4teLUaDA6FG/QIu2q0rI6I36tZVfM4DBZv3BG0mkMIdEnMbhc4xwLvLJSS22uWmaVkFkqWgIS0gPIm+A== "@esbuild/win32-arm64@0.19.12": version "0.19.12" resolved "https://registry.yarnpkg.com/@esbuild/win32-arm64/-/win32-arm64-0.19.12.tgz#773bdbaa1971b36db2f6560088639ccd1e6773ae" integrity sha512-URgtR1dJnmGvX864pn1B2YUYNzjmXkuJOIqG2HdU62MVS4EHpU2946OZoTMnRUHklGtJdJZ33QfzdjGACXhn1A== -"@esbuild/win32-arm64@0.20.2": - version "0.20.2" - resolved "https://registry.yarnpkg.com/@esbuild/win32-arm64/-/win32-arm64-0.20.2.tgz#f1ae5abf9ca052ae11c1bc806fb4c0f519bacf90" - integrity sha512-GRibxoawM9ZCnDxnP3usoUDO9vUkpAxIIZ6GQI+IlVmr5kP3zUq+l17xELTHMWTWzjxa2guPNyrpq1GWmPvcGQ== +"@esbuild/win32-arm64@0.21.4": + version "0.21.4" + resolved "https://registry.yarnpkg.com/@esbuild/win32-arm64/-/win32-arm64-0.21.4.tgz#94afb4c2ac89b0f09791606d6d93fdab322f81c8" + integrity sha512-UUfMgMoXPoA/bvGUNfUBFLCh0gt9dxZYIx9W4rfJr7+hKe5jxxHmfOK8YSH4qsHLLN4Ck8JZ+v7Q5fIm1huErg== "@esbuild/win32-ia32@0.19.12": version "0.19.12" resolved "https://registry.yarnpkg.com/@esbuild/win32-ia32/-/win32-ia32-0.19.12.tgz#000516cad06354cc84a73f0943a4aa690ef6fd67" integrity sha512-+ZOE6pUkMOJfmxmBZElNOx72NKpIa/HFOMGzu8fqzQJ5kgf6aTGrcJaFsNiVMH4JKpMipyK+7k0n2UXN7a8YKQ== -"@esbuild/win32-ia32@0.20.2": - version "0.20.2" - resolved "https://registry.yarnpkg.com/@esbuild/win32-ia32/-/win32-ia32-0.20.2.tgz#241fe62c34d8e8461cd708277813e1d0ba55ce23" - integrity sha512-HfLOfn9YWmkSKRQqovpnITazdtquEW8/SoHW7pWpuEeguaZI4QnCRW6b+oZTztdBnZOS2hqJ6im/D5cPzBTTlQ== +"@esbuild/win32-ia32@0.21.4": + version "0.21.4" + resolved "https://registry.yarnpkg.com/@esbuild/win32-ia32/-/win32-ia32-0.21.4.tgz#822085cd52f2f1dd90eabb59346ffa779c0bab83" + integrity sha512-yIxbspZb5kGCAHWm8dexALQ9en1IYDfErzjSEq1KzXFniHv019VT3mNtTK7t8qdy4TwT6QYHI9sEZabONHg+aw== "@esbuild/win32-x64@0.19.12": version "0.19.12" resolved "https://registry.yarnpkg.com/@esbuild/win32-x64/-/win32-x64-0.19.12.tgz#c57c8afbb4054a3ab8317591a0b7320360b444ae" integrity sha512-T1QyPSDCyMXaO3pzBkF96E8xMkiRYbUEZADd29SyPGabqxMViNoii+NcK7eWJAEoU6RZyEm5lVSIjTmcdoB9HA== -"@esbuild/win32-x64@0.20.2": - version "0.20.2" - resolved "https://registry.yarnpkg.com/@esbuild/win32-x64/-/win32-x64-0.20.2.tgz#9c907b21e30a52db959ba4f80bb01a0cc403d5cc" - integrity sha512-N49X4lJX27+l9jbLKSqZ6bKNjzQvHaT8IIFUy+YIqmXQdjYCToGWwOItDrfby14c78aDd5NHQl29xingXfCdLQ== +"@esbuild/win32-x64@0.21.4": + version "0.21.4" + resolved "https://registry.yarnpkg.com/@esbuild/win32-x64/-/win32-x64-0.21.4.tgz#11ef0398f9abee161193461910a507ef0d4c0c32" + integrity sha512-sywLRD3UK/qRJt0oBwdpYLBibk7KiRfbswmWRDabuncQYSlf8aLEEUor/oP6KRz8KEG+HoiVLBhPRD5JWjS8Sg== "@eslint-community/eslint-utils@^4.2.0", "@eslint-community/eslint-utils@^4.4.0": version "4.4.0" @@ -614,6 +614,21 @@ minimatch "^3.1.2" strip-json-comments "^3.1.1" +"@eslint/eslintrc@^3.0.2": + version "3.1.0" + resolved "https://registry.yarnpkg.com/@eslint/eslintrc/-/eslintrc-3.1.0.tgz#dbd3482bfd91efa663cbe7aa1f506839868207b6" + integrity sha512-4Bfj15dVJdoy3RfZmmo86RK1Fwzn6SstsvK9JS+BaVKqC6QQQQyXekNaC+g+LKNgkQ+2VhGAzm6hO40AhMR3zQ== + dependencies: + ajv "^6.12.4" + debug "^4.3.2" + espree "^10.0.1" + globals "^14.0.0" + ignore "^5.2.0" + import-fresh "^3.2.1" + js-yaml "^4.1.0" + minimatch "^3.1.2" + strip-json-comments "^3.1.1" + "@eslint/js@8.57.0": version "8.57.0" resolved "https://registry.yarnpkg.com/@eslint/js/-/js-8.57.0.tgz#a5417ae8427873f1dd08b70b3574b453e67b5f7f" @@ -936,6 +951,26 @@ "@jridgewell/resolve-uri" "^3.1.0" "@jridgewell/sourcemap-codec" "^1.4.14" +"@jsonjoy.com/base64@^1.1.1": + version "1.1.1" + resolved "https://registry.yarnpkg.com/@jsonjoy.com/base64/-/base64-1.1.1.tgz#a717fd8840f7bad49c7fe66cc65db8bcfc4c4dc5" + integrity sha512-LnFjVChaGY8cZVMwAIMjvA1XwQjZ/zIXHyh28IyJkyNkzof4Dkm1+KN9UIm3lHhREH4vs7XwZ0NpkZKnwOtEfg== + +"@jsonjoy.com/json-pack@^1.0.3": + version "1.0.3" + resolved "https://registry.yarnpkg.com/@jsonjoy.com/json-pack/-/json-pack-1.0.3.tgz#a68cbe3ccfd85d26cd763e4175fe90c9ee383d33" + integrity sha512-Q0SPAdmK6s5Fe3e1kcNvwNyk6e2+CxM8XZdGbf4abZG7nUO05KSie3/iX29loTBuY+75uVP6RixDSPVpotfzmQ== + dependencies: + "@jsonjoy.com/base64" "^1.1.1" + "@jsonjoy.com/util" "^1.1.2" + hyperdyperid "^1.2.0" + thingies "^1.20.0" + +"@jsonjoy.com/util@^1.1.2": + version "1.1.2" + resolved "https://registry.yarnpkg.com/@jsonjoy.com/util/-/util-1.1.2.tgz#5072c27ecdb16d1ed7a2d125a1d0ed8aba01d652" + integrity sha512-HOGa9wtE6LEz2I5mMQ2pMSjth85PmD71kPbsecs02nEUq3/Kw0wRK3gmZn5BCEB8mFLXByqPxjHgApoMwIPMKQ== + "@nodelib/fs.scandir@2.1.5": version "2.1.5" resolved "https://registry.yarnpkg.com/@nodelib/fs.scandir/-/fs.scandir-2.1.5.tgz#7619c2eb21b25483f6d167548b4cfd5a7488c3d5" @@ -1000,85 +1035,85 @@ estree-walker "^2.0.2" picomatch "^2.3.1" -"@rollup/rollup-android-arm-eabi@4.14.3": - version "4.14.3" - resolved "https://registry.yarnpkg.com/@rollup/rollup-android-arm-eabi/-/rollup-android-arm-eabi-4.14.3.tgz#bddf05c3387d02fac04b6b86b3a779337edfed75" - integrity sha512-X9alQ3XM6I9IlSlmC8ddAvMSyG1WuHk5oUnXGw+yUBs3BFoTizmG1La/Gr8fVJvDWAq+zlYTZ9DBgrlKRVY06g== - -"@rollup/rollup-android-arm64@4.14.3": - version "4.14.3" - resolved "https://registry.yarnpkg.com/@rollup/rollup-android-arm64/-/rollup-android-arm64-4.14.3.tgz#b26bd09de58704c0a45e3375b76796f6eda825e4" - integrity sha512-eQK5JIi+POhFpzk+LnjKIy4Ks+pwJ+NXmPxOCSvOKSNRPONzKuUvWE+P9JxGZVxrtzm6BAYMaL50FFuPe0oWMQ== - -"@rollup/rollup-darwin-arm64@4.14.3": - version "4.14.3" - resolved "https://registry.yarnpkg.com/@rollup/rollup-darwin-arm64/-/rollup-darwin-arm64-4.14.3.tgz#c5f3fd1aa285b6d33dda6e3f3ca395f8c37fd5ca" - integrity sha512-Od4vE6f6CTT53yM1jgcLqNfItTsLt5zE46fdPaEmeFHvPs5SjZYlLpHrSiHEKR1+HdRfxuzXHjDOIxQyC3ptBA== - -"@rollup/rollup-darwin-x64@4.14.3": - version "4.14.3" - resolved "https://registry.yarnpkg.com/@rollup/rollup-darwin-x64/-/rollup-darwin-x64-4.14.3.tgz#8e4673734d7dc9d68f6d48e81246055cda0e840f" - integrity sha512-0IMAO21axJeNIrvS9lSe/PGthc8ZUS+zC53O0VhF5gMxfmcKAP4ESkKOCwEi6u2asUrt4mQv2rjY8QseIEb1aw== - -"@rollup/rollup-linux-arm-gnueabihf@4.14.3": - version "4.14.3" - resolved "https://registry.yarnpkg.com/@rollup/rollup-linux-arm-gnueabihf/-/rollup-linux-arm-gnueabihf-4.14.3.tgz#53ed38eb13b58ababdb55a7f66f0538a7f85dcba" - integrity sha512-ge2DC7tHRHa3caVEoSbPRJpq7azhG+xYsd6u2MEnJ6XzPSzQsTKyXvh6iWjXRf7Rt9ykIUWHtl0Uz3T6yXPpKw== - -"@rollup/rollup-linux-arm-musleabihf@4.14.3": - version "4.14.3" - resolved "https://registry.yarnpkg.com/@rollup/rollup-linux-arm-musleabihf/-/rollup-linux-arm-musleabihf-4.14.3.tgz#0706ee38330e267a5c9326956820f009cfb21fcd" - integrity sha512-ljcuiDI4V3ySuc7eSk4lQ9wU8J8r8KrOUvB2U+TtK0TiW6OFDmJ+DdIjjwZHIw9CNxzbmXY39wwpzYuFDwNXuw== - -"@rollup/rollup-linux-arm64-gnu@4.14.3": - version "4.14.3" - resolved "https://registry.yarnpkg.com/@rollup/rollup-linux-arm64-gnu/-/rollup-linux-arm64-gnu-4.14.3.tgz#426fce7b8b242ac5abd48a10a5020f5a468c6cb4" - integrity sha512-Eci2us9VTHm1eSyn5/eEpaC7eP/mp5n46gTRB3Aar3BgSvDQGJZuicyq6TsH4HngNBgVqC5sDYxOzTExSU+NjA== - -"@rollup/rollup-linux-arm64-musl@4.14.3": - version "4.14.3" - resolved "https://registry.yarnpkg.com/@rollup/rollup-linux-arm64-musl/-/rollup-linux-arm64-musl-4.14.3.tgz#65bf944530d759b50d7ffd00dfbdf4125a43406f" - integrity sha512-UrBoMLCq4E92/LCqlh+blpqMz5h1tJttPIniwUgOFJyjWI1qrtrDhhpHPuFxULlUmjFHfloWdixtDhSxJt5iKw== - -"@rollup/rollup-linux-powerpc64le-gnu@4.14.3": - version "4.14.3" - resolved "https://registry.yarnpkg.com/@rollup/rollup-linux-powerpc64le-gnu/-/rollup-linux-powerpc64le-gnu-4.14.3.tgz#494ba3b31095e9a45df9c3f646d21400fb631a95" - integrity sha512-5aRjvsS8q1nWN8AoRfrq5+9IflC3P1leMoy4r2WjXyFqf3qcqsxRCfxtZIV58tCxd+Yv7WELPcO9mY9aeQyAmw== - -"@rollup/rollup-linux-riscv64-gnu@4.14.3": - version "4.14.3" - resolved "https://registry.yarnpkg.com/@rollup/rollup-linux-riscv64-gnu/-/rollup-linux-riscv64-gnu-4.14.3.tgz#8b88ed0a40724cce04aa15374ebe5ba4092d679f" - integrity sha512-sk/Qh1j2/RJSX7FhEpJn8n0ndxy/uf0kI/9Zc4b1ELhqULVdTfN6HL31CDaTChiBAOgLcsJ1sgVZjWv8XNEsAQ== - -"@rollup/rollup-linux-s390x-gnu@4.14.3": - version "4.14.3" - resolved "https://registry.yarnpkg.com/@rollup/rollup-linux-s390x-gnu/-/rollup-linux-s390x-gnu-4.14.3.tgz#09c9e5ec57a0f6ec3551272c860bb9a04b96d70f" - integrity sha512-jOO/PEaDitOmY9TgkxF/TQIjXySQe5KVYB57H/8LRP/ux0ZoO8cSHCX17asMSv3ruwslXW/TLBcxyaUzGRHcqg== - -"@rollup/rollup-linux-x64-gnu@4.14.3": - version "4.14.3" - resolved "https://registry.yarnpkg.com/@rollup/rollup-linux-x64-gnu/-/rollup-linux-x64-gnu-4.14.3.tgz#197f27fd481ad9c861021d5cbbf21793922a631c" - integrity sha512-8ybV4Xjy59xLMyWo3GCfEGqtKV5M5gCSrZlxkPGvEPCGDLNla7v48S662HSGwRd6/2cSneMQWiv+QzcttLrrOA== - -"@rollup/rollup-linux-x64-musl@4.14.3": - version "4.14.3" - resolved "https://registry.yarnpkg.com/@rollup/rollup-linux-x64-musl/-/rollup-linux-x64-musl-4.14.3.tgz#5cc0522f4942f2df625e9bfb6fb02c6580ffbce6" - integrity sha512-s+xf1I46trOY10OqAtZ5Rm6lzHre/UiLA1J2uOhCFXWkbZrJRkYBPO6FhvGfHmdtQ3Bx793MNa7LvoWFAm93bg== - -"@rollup/rollup-win32-arm64-msvc@4.14.3": - version "4.14.3" - resolved "https://registry.yarnpkg.com/@rollup/rollup-win32-arm64-msvc/-/rollup-win32-arm64-msvc-4.14.3.tgz#a648122389d23a7543b261fba082e65fefefe4f6" - integrity sha512-+4h2WrGOYsOumDQ5S2sYNyhVfrue+9tc9XcLWLh+Kw3UOxAvrfOrSMFon60KspcDdytkNDh7K2Vs6eMaYImAZg== - -"@rollup/rollup-win32-ia32-msvc@4.14.3": - version "4.14.3" - resolved "https://registry.yarnpkg.com/@rollup/rollup-win32-ia32-msvc/-/rollup-win32-ia32-msvc-4.14.3.tgz#34727b5c7953c35fc6e1ae4f770ad3a2025f8e03" - integrity sha512-T1l7y/bCeL/kUwh9OD4PQT4aM7Bq43vX05htPJJ46RTI4r5KNt6qJRzAfNfM+OYMNEVBWQzR2Gyk+FXLZfogGw== - -"@rollup/rollup-win32-x64-msvc@4.14.3": - version "4.14.3" - resolved "https://registry.yarnpkg.com/@rollup/rollup-win32-x64-msvc/-/rollup-win32-x64-msvc-4.14.3.tgz#5b2fb4d8cd44c05deef8a7b0e6deb9ccb8939d18" - integrity sha512-/BypzV0H1y1HzgYpxqRaXGBRqfodgoBBCcsrujT6QRcakDQdfU+Lq9PENPh5jB4I44YWq+0C2eHsHya+nZY1sA== +"@rollup/rollup-android-arm-eabi@4.18.0": + version "4.18.0" + resolved "https://registry.yarnpkg.com/@rollup/rollup-android-arm-eabi/-/rollup-android-arm-eabi-4.18.0.tgz#bbd0e616b2078cd2d68afc9824d1fadb2f2ffd27" + integrity sha512-Tya6xypR10giZV1XzxmH5wr25VcZSncG0pZIjfePT0OVBvqNEurzValetGNarVrGiq66EBVAFn15iYX4w6FKgQ== + +"@rollup/rollup-android-arm64@4.18.0": + version "4.18.0" + resolved "https://registry.yarnpkg.com/@rollup/rollup-android-arm64/-/rollup-android-arm64-4.18.0.tgz#97255ef6384c5f73f4800c0de91f5f6518e21203" + integrity sha512-avCea0RAP03lTsDhEyfy+hpfr85KfyTctMADqHVhLAF3MlIkq83CP8UfAHUssgXTYd+6er6PaAhx/QGv4L1EiA== + +"@rollup/rollup-darwin-arm64@4.18.0": + version "4.18.0" + resolved "https://registry.yarnpkg.com/@rollup/rollup-darwin-arm64/-/rollup-darwin-arm64-4.18.0.tgz#b6dd74e117510dfe94541646067b0545b42ff096" + integrity sha512-IWfdwU7KDSm07Ty0PuA/W2JYoZ4iTj3TUQjkVsO/6U+4I1jN5lcR71ZEvRh52sDOERdnNhhHU57UITXz5jC1/w== + +"@rollup/rollup-darwin-x64@4.18.0": + version "4.18.0" + resolved "https://registry.yarnpkg.com/@rollup/rollup-darwin-x64/-/rollup-darwin-x64-4.18.0.tgz#e07d76de1cec987673e7f3d48ccb8e106d42c05c" + integrity sha512-n2LMsUz7Ynu7DoQrSQkBf8iNrjOGyPLrdSg802vk6XT3FtsgX6JbE8IHRvposskFm9SNxzkLYGSq9QdpLYpRNA== + +"@rollup/rollup-linux-arm-gnueabihf@4.18.0": + version "4.18.0" + resolved "https://registry.yarnpkg.com/@rollup/rollup-linux-arm-gnueabihf/-/rollup-linux-arm-gnueabihf-4.18.0.tgz#9f1a6d218b560c9d75185af4b8bb42f9f24736b8" + integrity sha512-C/zbRYRXFjWvz9Z4haRxcTdnkPt1BtCkz+7RtBSuNmKzMzp3ZxdM28Mpccn6pt28/UWUCTXa+b0Mx1k3g6NOMA== + +"@rollup/rollup-linux-arm-musleabihf@4.18.0": + version "4.18.0" + resolved "https://registry.yarnpkg.com/@rollup/rollup-linux-arm-musleabihf/-/rollup-linux-arm-musleabihf-4.18.0.tgz#53618b92e6ffb642c7b620e6e528446511330549" + integrity sha512-l3m9ewPgjQSXrUMHg93vt0hYCGnrMOcUpTz6FLtbwljo2HluS4zTXFy2571YQbisTnfTKPZ01u/ukJdQTLGh9A== + +"@rollup/rollup-linux-arm64-gnu@4.18.0": + version "4.18.0" + resolved "https://registry.yarnpkg.com/@rollup/rollup-linux-arm64-gnu/-/rollup-linux-arm64-gnu-4.18.0.tgz#99a7ba5e719d4f053761a698f7b52291cefba577" + integrity sha512-rJ5D47d8WD7J+7STKdCUAgmQk49xuFrRi9pZkWoRD1UeSMakbcepWXPF8ycChBoAqs1pb2wzvbY6Q33WmN2ftw== + +"@rollup/rollup-linux-arm64-musl@4.18.0": + version "4.18.0" + resolved "https://registry.yarnpkg.com/@rollup/rollup-linux-arm64-musl/-/rollup-linux-arm64-musl-4.18.0.tgz#f53db99a45d9bc00ce94db8a35efa7c3c144a58c" + integrity sha512-be6Yx37b24ZwxQ+wOQXXLZqpq4jTckJhtGlWGZs68TgdKXJgw54lUUoFYrg6Zs/kjzAQwEwYbp8JxZVzZLRepQ== + +"@rollup/rollup-linux-powerpc64le-gnu@4.18.0": + version "4.18.0" + resolved "https://registry.yarnpkg.com/@rollup/rollup-linux-powerpc64le-gnu/-/rollup-linux-powerpc64le-gnu-4.18.0.tgz#cbb0837408fe081ce3435cf3730e090febafc9bf" + integrity sha512-hNVMQK+qrA9Todu9+wqrXOHxFiD5YmdEi3paj6vP02Kx1hjd2LLYR2eaN7DsEshg09+9uzWi2W18MJDlG0cxJA== + +"@rollup/rollup-linux-riscv64-gnu@4.18.0": + version "4.18.0" + resolved "https://registry.yarnpkg.com/@rollup/rollup-linux-riscv64-gnu/-/rollup-linux-riscv64-gnu-4.18.0.tgz#8ed09c1d1262ada4c38d791a28ae0fea28b80cc9" + integrity sha512-ROCM7i+m1NfdrsmvwSzoxp9HFtmKGHEqu5NNDiZWQtXLA8S5HBCkVvKAxJ8U+CVctHwV2Gb5VUaK7UAkzhDjlg== + +"@rollup/rollup-linux-s390x-gnu@4.18.0": + version "4.18.0" + resolved "https://registry.yarnpkg.com/@rollup/rollup-linux-s390x-gnu/-/rollup-linux-s390x-gnu-4.18.0.tgz#938138d3c8e0c96f022252a28441dcfb17afd7ec" + integrity sha512-0UyyRHyDN42QL+NbqevXIIUnKA47A+45WyasO+y2bGJ1mhQrfrtXUpTxCOrfxCR4esV3/RLYyucGVPiUsO8xjg== + +"@rollup/rollup-linux-x64-gnu@4.18.0": + version "4.18.0" + resolved "https://registry.yarnpkg.com/@rollup/rollup-linux-x64-gnu/-/rollup-linux-x64-gnu-4.18.0.tgz#1a7481137a54740bee1ded4ae5752450f155d942" + integrity sha512-xuglR2rBVHA5UsI8h8UbX4VJ470PtGCf5Vpswh7p2ukaqBGFTnsfzxUBetoWBWymHMxbIG0Cmx7Y9qDZzr648w== + +"@rollup/rollup-linux-x64-musl@4.18.0": + version "4.18.0" + resolved "https://registry.yarnpkg.com/@rollup/rollup-linux-x64-musl/-/rollup-linux-x64-musl-4.18.0.tgz#f1186afc601ac4f4fc25fac4ca15ecbee3a1874d" + integrity sha512-LKaqQL9osY/ir2geuLVvRRs+utWUNilzdE90TpyoX0eNqPzWjRm14oMEE+YLve4k/NAqCdPkGYDaDF5Sw+xBfg== + +"@rollup/rollup-win32-arm64-msvc@4.18.0": + version "4.18.0" + resolved "https://registry.yarnpkg.com/@rollup/rollup-win32-arm64-msvc/-/rollup-win32-arm64-msvc-4.18.0.tgz#ed6603e93636a96203c6915be4117245c1bd2daf" + integrity sha512-7J6TkZQFGo9qBKH0pk2cEVSRhJbL6MtfWxth7Y5YmZs57Pi+4x6c2dStAUvaQkHQLnEQv1jzBUW43GvZW8OFqA== + +"@rollup/rollup-win32-ia32-msvc@4.18.0": + version "4.18.0" + resolved "https://registry.yarnpkg.com/@rollup/rollup-win32-ia32-msvc/-/rollup-win32-ia32-msvc-4.18.0.tgz#14e0b404b1c25ebe6157a15edb9c46959ba74c54" + integrity sha512-Txjh+IxBPbkUB9+SXZMpv+b/vnTEtFyfWZgJ6iyCmt2tdx0OF5WhFowLmnh8ENGNpfUlUZkdI//4IEmhwPieNg== + +"@rollup/rollup-win32-x64-msvc@4.18.0": + version "4.18.0" + resolved "https://registry.yarnpkg.com/@rollup/rollup-win32-x64-msvc/-/rollup-win32-x64-msvc-4.18.0.tgz#5d694d345ce36b6ecf657349e03eb87297e68da4" + integrity sha512-UOo5FdvOL0+eIVTgS4tIdbW+TtnBLWg1YBCcU2KWM7nuNwRz9bksDX1bekJJCpu25N1DVWaCwnT39dVQxzqS8g== "@rollup/stream@3.0.1": version "3.0.1" @@ -1104,91 +1139,91 @@ dependencies: "@sinonjs/commons" "^3.0.0" -"@swc/core-darwin-arm64@1.4.14": - version "1.4.14" - resolved "https://registry.yarnpkg.com/@swc/core-darwin-arm64/-/core-darwin-arm64-1.4.14.tgz#de570252c3f155f55536f0d6bb8bafaec2e99616" - integrity sha512-8iPfLhYNspBl836YYsfv6ErXwDUqJ7IMieddV3Ey/t/97JAEAdNDUdtTKDtbyP0j/Ebyqyn+fKcqwSq7rAof0g== - -"@swc/core-darwin-x64@1.4.14": - version "1.4.14" - resolved "https://registry.yarnpkg.com/@swc/core-darwin-x64/-/core-darwin-x64-1.4.14.tgz#4eefbe129e416f4c400656742ab7f30e01aff02e" - integrity sha512-9CqSj8uRZ92cnlgAlVaWMaJJBdxtNvCzJxaGj5KuIseeG6Q0l1g+qk8JcU7h9dAsH9saHTNwNFBVGKQo0W0ujg== - -"@swc/core-linux-arm-gnueabihf@1.4.14": - version "1.4.14" - resolved "https://registry.yarnpkg.com/@swc/core-linux-arm-gnueabihf/-/core-linux-arm-gnueabihf-1.4.14.tgz#bea4b94c32bb25de2816126dac299655529ba7f3" - integrity sha512-mfd5JArPITTzMjcezH4DwMw+BdjBV1y25Khp8itEIpdih9ei+fvxOOrDYTN08b466NuE2dF2XuhKtRLA7fXArQ== - -"@swc/core-linux-arm64-gnu@1.4.14": - version "1.4.14" - resolved "https://registry.yarnpkg.com/@swc/core-linux-arm64-gnu/-/core-linux-arm64-gnu-1.4.14.tgz#52063214f4a14d6a0c3c6059ed9e7ba1062f6b46" - integrity sha512-3Lqlhlmy8MVRS9xTShMaPAp0oyUt0KFhDs4ixJsjdxKecE0NJSV/MInuDmrkij1C8/RQ2wySRlV9np5jK86oWw== - -"@swc/core-linux-arm64-musl@1.4.14": - version "1.4.14" - resolved "https://registry.yarnpkg.com/@swc/core-linux-arm64-musl/-/core-linux-arm64-musl-1.4.14.tgz#7e7deea7b1b3d0c9944cc8e9ba948fcc785158ea" - integrity sha512-n0YoCa64TUcJrbcXIHIHDWQjdUPdaXeMHNEu7yyBtOpm01oMGTKP3frsUXIABLBmAVWtKvqit4/W1KVKn5gJzg== - -"@swc/core-linux-x64-gnu@1.4.14": - version "1.4.14" - resolved "https://registry.yarnpkg.com/@swc/core-linux-x64-gnu/-/core-linux-x64-gnu-1.4.14.tgz#301133ea3ee347568886f2489837e991e96d44db" - integrity sha512-CGmlwLWbfG1dB4jZBJnp2IWlK5xBMNLjN7AR5kKA3sEpionoccEnChOEvfux1UdVJQjLRKuHNV9yGyqGBTpxfQ== - -"@swc/core-linux-x64-musl@1.4.14": - version "1.4.14" - resolved "https://registry.yarnpkg.com/@swc/core-linux-x64-musl/-/core-linux-x64-musl-1.4.14.tgz#86b8e987a814209cd0dd0f21cbc1134305dfffd5" - integrity sha512-xq4npk8YKYmNwmr8fbvF2KP3kUVdZYfXZMQnW425gP3/sn+yFQO8Nd0bGH40vOVQn41kEesSe0Z5O/JDor2TgQ== - -"@swc/core-win32-arm64-msvc@1.4.14": - version "1.4.14" - resolved "https://registry.yarnpkg.com/@swc/core-win32-arm64-msvc/-/core-win32-arm64-msvc-1.4.14.tgz#eb56b8977e3542665929c3963bd7dc18fe5b2556" - integrity sha512-imq0X+gU9uUe6FqzOQot5gpKoaC00aCUiN58NOzwp0QXEupn8CDuZpdBN93HiZswfLruu5jA1tsc15x6v9p0Yg== - -"@swc/core-win32-ia32-msvc@1.4.14": - version "1.4.14" - resolved "https://registry.yarnpkg.com/@swc/core-win32-ia32-msvc/-/core-win32-ia32-msvc-1.4.14.tgz#72e119038b9d8743b13bb933b8e192acd9f501f9" - integrity sha512-cH6QpXMw5D3t+lpx6SkErHrxN0yFzmQ0lgNAJxoDRiaAdDbqA6Col8UqUJwUS++Ul6aCWgNhCdiEYehPaoyDPA== - -"@swc/core-win32-x64-msvc@1.4.14": - version "1.4.14" - resolved "https://registry.yarnpkg.com/@swc/core-win32-x64-msvc/-/core-win32-x64-msvc-1.4.14.tgz#f5a3b1a241708b0628a07458e5bedbf67a1b9595" - integrity sha512-FmZ4Tby4wW65K/36BKzmuu7mlq7cW5XOxzvufaSNVvQ5PN4OodAlqPjToe029oma4Av+ykJiif64scMttyNAzg== - -"@swc/core@1.4.14": - version "1.4.14" - resolved "https://registry.yarnpkg.com/@swc/core/-/core-1.4.14.tgz#8bad316c0119f626bb1b181ba7a988ef9d14e9cc" - integrity sha512-tHXg6OxboUsqa/L7DpsCcFnxhLkqN/ht5pCwav1HnvfthbiNIJypr86rNx4cUnQDJepETviSqBTIjxa7pSpGDQ== - dependencies: - "@swc/counter" "^0.1.2" - "@swc/types" "^0.1.5" +"@swc/core-darwin-arm64@1.5.24": + version "1.5.24" + resolved "https://registry.yarnpkg.com/@swc/core-darwin-arm64/-/core-darwin-arm64-1.5.24.tgz#71875695bc617e57c2d93352f48317b4c41e0240" + integrity sha512-M7oLOcC0sw+UTyAuL/9uyB9GeO4ZpaBbH76JSH6g1m0/yg7LYJZGRmplhDmwVSDAR5Fq4Sjoi1CksmmGkgihGA== + +"@swc/core-darwin-x64@1.5.24": + version "1.5.24" + resolved "https://registry.yarnpkg.com/@swc/core-darwin-x64/-/core-darwin-x64-1.5.24.tgz#6b4c3eb9b21ab50b7324a82c9497ffeb2e8e0a57" + integrity sha512-MfcFjGGYognpSBSos2pYUNYJSmqEhuw5ceGr6qAdME7ddbjGXliza4W6FggsM+JnWwpqa31+e7/R+GetW4WkaQ== + +"@swc/core-linux-arm-gnueabihf@1.5.24": + version "1.5.24" + resolved "https://registry.yarnpkg.com/@swc/core-linux-arm-gnueabihf/-/core-linux-arm-gnueabihf-1.5.24.tgz#5730ed6ad86afe4ee8df04ee6f21430daead186c" + integrity sha512-amI2pwtcWV3E/m/nf+AQtn1LWDzKLZyjCmWd3ms7QjEueWYrY8cU1Y4Wp7wNNsxIoPOi8zek1Uj2wwFD/pttNQ== + +"@swc/core-linux-arm64-gnu@1.5.24": + version "1.5.24" + resolved "https://registry.yarnpkg.com/@swc/core-linux-arm64-gnu/-/core-linux-arm64-gnu-1.5.24.tgz#0a2478e8601391aa88f82bfece1dbc60d27cbcfd" + integrity sha512-sTSvmqMmgT1ynH/nP75Pc51s+iT4crZagHBiDOf5cq+kudUYjda9lWMs7xkXB/TUKFHPCRK0HGunl8bkwiIbuw== + +"@swc/core-linux-arm64-musl@1.5.24": + version "1.5.24" + resolved "https://registry.yarnpkg.com/@swc/core-linux-arm64-musl/-/core-linux-arm64-musl-1.5.24.tgz#e0199092dc611ca75f8a92dcea17de44e38f3fbf" + integrity sha512-vd2/hfOBGbrX21FxsFdXCUaffjkHvlZkeE2UMRajdXifwv79jqOHIJg3jXG1F3ZrhCghCzirFts4tAZgcG8XWg== + +"@swc/core-linux-x64-gnu@1.5.24": + version "1.5.24" + resolved "https://registry.yarnpkg.com/@swc/core-linux-x64-gnu/-/core-linux-x64-gnu-1.5.24.tgz#1fe347c9f28457c593f2fda5b0d4904a2b105ecd" + integrity sha512-Zrdzi7NqzQxm2BvAG5KyOSBEggQ7ayrxh599AqqevJmsUXJ8o2nMiWQOBvgCGp7ye+Biz3pvZn1EnRzAp+TpUg== + +"@swc/core-linux-x64-musl@1.5.24": + version "1.5.24" + resolved "https://registry.yarnpkg.com/@swc/core-linux-x64-musl/-/core-linux-x64-musl-1.5.24.tgz#bf6ac583fac211d704d2d78cfd0b7bf751268f5e" + integrity sha512-1F8z9NRi52jdZQCGc5sflwYSctL6omxiVmIFVp8TC9nngjQKc00TtX/JC2Eo2HwvgupkFVl5YQJidAck9YtmJw== + +"@swc/core-win32-arm64-msvc@1.5.24": + version "1.5.24" + resolved "https://registry.yarnpkg.com/@swc/core-win32-arm64-msvc/-/core-win32-arm64-msvc-1.5.24.tgz#41b9faf4db69cc08a43c3a176df2a7b94d765637" + integrity sha512-cKpP7KvS6Xr0jFSTBXY53HZX/YfomK5EMQYpCVDOvfsZeYHN20sQSKXfpVLvA/q2igVt1zzy1XJcOhpJcgiKLg== + +"@swc/core-win32-ia32-msvc@1.5.24": + version "1.5.24" + resolved "https://registry.yarnpkg.com/@swc/core-win32-ia32-msvc/-/core-win32-ia32-msvc-1.5.24.tgz#e123ad00e3b28d567d3851a86697fb3c54ed817a" + integrity sha512-IoPWfi0iwqjZuf7gE223+B97/ZwkKbu7qL5KzGP7g3hJrGSKAvv7eC5Y9r2iKKtLKyv5R/T6Ho0kFR/usi7rHw== + +"@swc/core-win32-x64-msvc@1.5.24": + version "1.5.24" + resolved "https://registry.yarnpkg.com/@swc/core-win32-x64-msvc/-/core-win32-x64-msvc-1.5.24.tgz#21fb87b1981253039e6d45255e31a875f446e397" + integrity sha512-zHgF2k1uVJL8KIW+PnVz1To4a3Cz9THbh2z2lbehaF/gKHugH4c3djBozU4das1v35KOqf5jWIEviBLql2wDLQ== + +"@swc/core@1.5.24": + version "1.5.24" + resolved "https://registry.yarnpkg.com/@swc/core/-/core-1.5.24.tgz#9ecb4601cb6a4fb19f227ec5fb59d07e23347dca" + integrity sha512-Eph9zvO4xvqWZGVzTdtdEJ0Vqf0VIML/o/e4Qd2RLOqtfgnlRi7avmMu5C0oqciJ0tk+hqdUKVUZ4JPoPaiGvQ== + dependencies: + "@swc/counter" "^0.1.3" + "@swc/types" "^0.1.7" optionalDependencies: - "@swc/core-darwin-arm64" "1.4.14" - "@swc/core-darwin-x64" "1.4.14" - "@swc/core-linux-arm-gnueabihf" "1.4.14" - "@swc/core-linux-arm64-gnu" "1.4.14" - "@swc/core-linux-arm64-musl" "1.4.14" - "@swc/core-linux-x64-gnu" "1.4.14" - "@swc/core-linux-x64-musl" "1.4.14" - "@swc/core-win32-arm64-msvc" "1.4.14" - "@swc/core-win32-ia32-msvc" "1.4.14" - "@swc/core-win32-x64-msvc" "1.4.14" - -"@swc/counter@^0.1.2", "@swc/counter@^0.1.3": + "@swc/core-darwin-arm64" "1.5.24" + "@swc/core-darwin-x64" "1.5.24" + "@swc/core-linux-arm-gnueabihf" "1.5.24" + "@swc/core-linux-arm64-gnu" "1.5.24" + "@swc/core-linux-arm64-musl" "1.5.24" + "@swc/core-linux-x64-gnu" "1.5.24" + "@swc/core-linux-x64-musl" "1.5.24" + "@swc/core-win32-arm64-msvc" "1.5.24" + "@swc/core-win32-ia32-msvc" "1.5.24" + "@swc/core-win32-x64-msvc" "1.5.24" + +"@swc/counter@^0.1.3": version "0.1.3" resolved "https://registry.yarnpkg.com/@swc/counter/-/counter-0.1.3.tgz#cc7463bd02949611c6329596fccd2b0ec782b0e9" integrity sha512-e2BR4lsJkkRlKZ/qCHPw9ZaSxc0MVUd7gtbtaB7aMvHeJVYe8sOB8DBZkP2DtISHGSku9sCK6T6cnY0CtXrOCQ== -"@swc/helpers@^0.5.10": - version "0.5.10" - resolved "https://registry.yarnpkg.com/@swc/helpers/-/helpers-0.5.10.tgz#5720082d007197cd85743dd599198097126a3f6e" - integrity sha512-CU+RF9FySljn7HVSkkjiB84hWkvTaI3rtLvF433+jRSBL2hMu3zX5bGhHS8C80SM++h4xy8hBSnUHFQHmRXSBw== +"@swc/helpers@^0.5.11": + version "0.5.11" + resolved "https://registry.yarnpkg.com/@swc/helpers/-/helpers-0.5.11.tgz#5bab8c660a6e23c13b2d23fcd1ee44a2db1b0cb7" + integrity sha512-YNlnKRWF2sVojTpIyzwou9XoTNbzbzONwRhOoniEioF1AtaitTvVZblaQRrAzChWQ1bLYyYSWzM18y4WwgzJ+A== dependencies: tslib "^2.4.0" -"@swc/types@^0.1.5": - version "0.1.6" - resolved "https://registry.yarnpkg.com/@swc/types/-/types-0.1.6.tgz#2f13f748995b247d146de2784d3eb7195410faba" - integrity sha512-/JLo/l2JsT/LRd80C3HfbmVpxOAJ11FO2RCEslFrgzLltoP9j8XIbsyDcfCt2WWyX+CM96rBoNM+IToAkFOugg== +"@swc/types@^0.1.7": + version "0.1.7" + resolved "https://registry.yarnpkg.com/@swc/types/-/types-0.1.7.tgz#ea5d658cf460abff51507ca8d26e2d391bafb15e" + integrity sha512-scHWahbHF0eyj3JsxG9CFJgFdFNaVQCNAimBlT6PzS3n/HptxqREjsm4OH6AN3lYcffZYSPxXW8ua2BEHp0lJQ== dependencies: "@swc/counter" "^0.1.3" @@ -1328,7 +1363,7 @@ expect "^29.0.0" pretty-format "^29.0.0" -"@types/json-schema@*", "@types/json-schema@^7.0.15", "@types/json-schema@^7.0.8", "@types/json-schema@^7.0.9": +"@types/json-schema@*", "@types/json-schema@^7.0.8": version "7.0.15" resolved "https://registry.yarnpkg.com/@types/json-schema/-/json-schema-7.0.15.tgz#596a1747233694d50f6ad8a7869fcb6f56cf5841" integrity sha512-5+fP8P8MFNC+AyZCDxrB2pkZFPGzqQWUzpSeuuVLvm8VMcorNYavBqoFcxK8bQz4Qsbn4oUEEem4wDLfcysGHA== @@ -1350,22 +1385,10 @@ dependencies: "@types/node" "*" -"@types/node@*": - version "20.12.3" - resolved "https://registry.yarnpkg.com/@types/node/-/node-20.12.3.tgz#d6658c2c7776c1cad93534bb45428195ed840c65" - integrity sha512-sD+ia2ubTeWrOu+YMF+MTAB7E+O7qsMqAbMfW7DG3K1URwhZ5hN1pLlRVGbf4wDFzSfikL05M17EyorS86jShw== - dependencies: - undici-types "~5.26.4" - -"@types/node@^13.7.4": - version "13.13.52" - resolved "https://registry.yarnpkg.com/@types/node/-/node-13.13.52.tgz#03c13be70b9031baaed79481c0c0cfb0045e53f7" - integrity sha512-s3nugnZumCC//n4moGGe6tkNMyYEdaDBitVjwPxXmR5lnMG5dHePinH2EdxkG3Rh1ghFHHixAG4NJhpJW1rthQ== - -"@types/node@^20.12.7": - version "20.12.7" - resolved "https://registry.yarnpkg.com/@types/node/-/node-20.12.7.tgz#04080362fa3dd6c5822061aa3124f5c152cff384" - integrity sha512-wq0cICSkRLVaf3UGLMGItu/PtdY7oaXaI/RVU+xliKVOtRna3PRY57ZDfztpDL0n11vfymMUnXv8QwYCO7L1wg== +"@types/node@*", "@types/node@>=13.7.4", "@types/node@^20.13.0": + version "20.14.1" + resolved "https://registry.yarnpkg.com/@types/node/-/node-20.14.1.tgz#2434dbcb1f039e31f2c0e9969da93f52cf6348f3" + integrity sha512-T2MzSGEu+ysB/FkWfqmhV3PLyQlowdptmmgD20C6QxsS8Fmv5SjpZ1ayXaEC0S21/h5UJ9iA6W/5vSNU5l00OA== dependencies: undici-types "~5.26.4" @@ -1379,11 +1402,6 @@ resolved "https://registry.yarnpkg.com/@types/resolve/-/resolve-1.20.2.tgz#97d26e00cd4a0423b4af620abecf3e6f442b7975" integrity sha512-60BCwRFOZCQhDncwQdxxeOEEkbc5dIMccYLwbxsS4TUNeVECQ/pBJ0j09mrHOl/JJvpRPGwO9SvE4nR2Nb/a4Q== -"@types/semver@^7.3.12", "@types/semver@^7.5.8": - version "7.5.8" - resolved "https://registry.yarnpkg.com/@types/semver/-/semver-7.5.8.tgz#8268a8c57a3e4abd25c165ecd36237db7948a55e" - integrity sha512-I8EUhyrgfLrcTkzV3TSsGyl1tSuPrEDzr0yd5m90UgNxQkyDXULk3b6MlQqTCpZpNtWe1K0hzclnZkTcLBe2UQ== - "@types/stack-utils@^2.0.0": version "2.0.3" resolved "https://registry.yarnpkg.com/@types/stack-utils/-/stack-utils-2.0.3.tgz#6209321eb2c1712a7e7466422b8cb1fc0d9dd5d8" @@ -1416,90 +1434,62 @@ dependencies: "@types/yargs-parser" "*" -"@typescript-eslint/eslint-plugin@7.7.0": - version "7.7.0" - resolved "https://registry.yarnpkg.com/@typescript-eslint/eslint-plugin/-/eslint-plugin-7.7.0.tgz#bf34a02f221811505b8bf2f31060c8560c1bb0a3" - integrity sha512-GJWR0YnfrKnsRoluVO3PRb9r5aMZriiMMM/RHj5nnTrBy1/wIgk76XCtCKcnXGjpZQJQRFtGV9/0JJ6n30uwpQ== +"@typescript-eslint/eslint-plugin@7.12.0": + version "7.12.0" + resolved "https://registry.yarnpkg.com/@typescript-eslint/eslint-plugin/-/eslint-plugin-7.12.0.tgz#f87a32e8972b8a60024f2f8f12205e7c8108bc41" + integrity sha512-7F91fcbuDf/d3S8o21+r3ZncGIke/+eWk0EpO21LXhDfLahriZF9CGj4fbAetEjlaBdjdSm9a6VeXbpbT6Z40Q== dependencies: "@eslint-community/regexpp" "^4.10.0" - "@typescript-eslint/scope-manager" "7.7.0" - "@typescript-eslint/type-utils" "7.7.0" - "@typescript-eslint/utils" "7.7.0" - "@typescript-eslint/visitor-keys" "7.7.0" - debug "^4.3.4" + "@typescript-eslint/scope-manager" "7.12.0" + "@typescript-eslint/type-utils" "7.12.0" + "@typescript-eslint/utils" "7.12.0" + "@typescript-eslint/visitor-keys" "7.12.0" graphemer "^1.4.0" ignore "^5.3.1" natural-compare "^1.4.0" - semver "^7.6.0" ts-api-utils "^1.3.0" -"@typescript-eslint/parser@7.7.0": - version "7.7.0" - resolved "https://registry.yarnpkg.com/@typescript-eslint/parser/-/parser-7.7.0.tgz#6b1b3ce76c5de002c43af8ae933613b0f2b4bcc6" - integrity sha512-fNcDm3wSwVM8QYL4HKVBggdIPAy9Q41vcvC/GtDobw3c4ndVT3K6cqudUmjHPw8EAp4ufax0o58/xvWaP2FmTg== +"@typescript-eslint/parser@7.12.0": + version "7.12.0" + resolved "https://registry.yarnpkg.com/@typescript-eslint/parser/-/parser-7.12.0.tgz#8761df3345528b35049353db80010b385719b1c3" + integrity sha512-dm/J2UDY3oV3TKius2OUZIFHsomQmpHtsV0FTh1WO8EKgHLQ1QCADUqscPgTpU+ih1e21FQSRjXckHn3txn6kQ== dependencies: - "@typescript-eslint/scope-manager" "7.7.0" - "@typescript-eslint/types" "7.7.0" - "@typescript-eslint/typescript-estree" "7.7.0" - "@typescript-eslint/visitor-keys" "7.7.0" + "@typescript-eslint/scope-manager" "7.12.0" + "@typescript-eslint/types" "7.12.0" + "@typescript-eslint/typescript-estree" "7.12.0" + "@typescript-eslint/visitor-keys" "7.12.0" debug "^4.3.4" -"@typescript-eslint/scope-manager@5.62.0": - version "5.62.0" - resolved "https://registry.yarnpkg.com/@typescript-eslint/scope-manager/-/scope-manager-5.62.0.tgz#d9457ccc6a0b8d6b37d0eb252a23022478c5460c" - integrity sha512-VXuvVvZeQCQb5Zgf4HAxc04q5j+WrNAtNh9OwCsCgpKqESMTu3tF/jhZ3xG6T4NZwWl65Bg8KuS2uEvhSfLl0w== +"@typescript-eslint/scope-manager@7.12.0": + version "7.12.0" + resolved "https://registry.yarnpkg.com/@typescript-eslint/scope-manager/-/scope-manager-7.12.0.tgz#259c014362de72dd34f995efe6bd8dda486adf58" + integrity sha512-itF1pTnN6F3unPak+kutH9raIkL3lhH1YRPGgt7QQOh43DQKVJXmWkpb+vpc/TiDHs6RSd9CTbDsc/Y+Ygq7kg== dependencies: - "@typescript-eslint/types" "5.62.0" - "@typescript-eslint/visitor-keys" "5.62.0" + "@typescript-eslint/types" "7.12.0" + "@typescript-eslint/visitor-keys" "7.12.0" -"@typescript-eslint/scope-manager@7.7.0": - version "7.7.0" - resolved "https://registry.yarnpkg.com/@typescript-eslint/scope-manager/-/scope-manager-7.7.0.tgz#3f0db079b275bb8b0cb5be7613fb3130cfb5de77" - integrity sha512-/8INDn0YLInbe9Wt7dK4cXLDYp0fNHP5xKLHvZl3mOT5X17rK/YShXaiNmorl+/U4VKCVIjJnx4Ri5b0y+HClw== +"@typescript-eslint/type-utils@7.12.0": + version "7.12.0" + resolved "https://registry.yarnpkg.com/@typescript-eslint/type-utils/-/type-utils-7.12.0.tgz#9dfaaa1972952f395ec5be4f5bbfc4d3cdc63908" + integrity sha512-lib96tyRtMhLxwauDWUp/uW3FMhLA6D0rJ8T7HmH7x23Gk1Gwwu8UZ94NMXBvOELn6flSPiBrCKlehkiXyaqwA== dependencies: - "@typescript-eslint/types" "7.7.0" - "@typescript-eslint/visitor-keys" "7.7.0" - -"@typescript-eslint/type-utils@7.7.0": - version "7.7.0" - resolved "https://registry.yarnpkg.com/@typescript-eslint/type-utils/-/type-utils-7.7.0.tgz#36792ff4209a781b058de61631a48df17bdefbc5" - integrity sha512-bOp3ejoRYrhAlnT/bozNQi3nio9tIgv3U5C0mVDdZC7cpcQEDZXvq8inrHYghLVwuNABRqrMW5tzAv88Vy77Sg== - dependencies: - "@typescript-eslint/typescript-estree" "7.7.0" - "@typescript-eslint/utils" "7.7.0" + "@typescript-eslint/typescript-estree" "7.12.0" + "@typescript-eslint/utils" "7.12.0" debug "^4.3.4" ts-api-utils "^1.3.0" -"@typescript-eslint/types@5.62.0": - version "5.62.0" - resolved "https://registry.yarnpkg.com/@typescript-eslint/types/-/types-5.62.0.tgz#258607e60effa309f067608931c3df6fed41fd2f" - integrity sha512-87NVngcbVXUahrRTqIK27gD2t5Cu1yuCXxbLcFtCzZGlfyVWWh8mLHkoxzjsB6DDNnvdL+fW8MiwPEJyGJQDgQ== - -"@typescript-eslint/types@7.7.0": - version "7.7.0" - resolved "https://registry.yarnpkg.com/@typescript-eslint/types/-/types-7.7.0.tgz#23af4d24bf9ce15d8d301236e3e3014143604f27" - integrity sha512-G01YPZ1Bd2hn+KPpIbrAhEWOn5lQBrjxkzHkWvP6NucMXFtfXoevK82hzQdpfuQYuhkvFDeQYbzXCjR1z9Z03w== +"@typescript-eslint/types@7.12.0": + version "7.12.0" + resolved "https://registry.yarnpkg.com/@typescript-eslint/types/-/types-7.12.0.tgz#bf208f971a8da1e7524a5d9ae2b5f15192a37981" + integrity sha512-o+0Te6eWp2ppKY3mLCU+YA9pVJxhUJE15FV7kxuD9jgwIAa+w/ycGJBMrYDTpVGUM/tgpa9SeMOugSabWFq7bg== -"@typescript-eslint/typescript-estree@5.62.0": - version "5.62.0" - resolved "https://registry.yarnpkg.com/@typescript-eslint/typescript-estree/-/typescript-estree-5.62.0.tgz#7d17794b77fabcac615d6a48fb143330d962eb9b" - integrity sha512-CmcQ6uY7b9y694lKdRB8FEel7JbU/40iSAPomu++SjLMntB+2Leay2LO6i8VnJk58MtE9/nQSFIH6jpyRWyYzA== +"@typescript-eslint/typescript-estree@7.12.0": + version "7.12.0" + resolved "https://registry.yarnpkg.com/@typescript-eslint/typescript-estree/-/typescript-estree-7.12.0.tgz#e6c1074f248b3db6573ab6a7c47a39c4cd498ff9" + integrity sha512-5bwqLsWBULv1h6pn7cMW5dXX/Y2amRqLaKqsASVwbBHMZSnHqE/HN4vT4fE0aFsiwxYvr98kqOWh1a8ZKXalCQ== dependencies: - "@typescript-eslint/types" "5.62.0" - "@typescript-eslint/visitor-keys" "5.62.0" - debug "^4.3.4" - globby "^11.1.0" - is-glob "^4.0.3" - semver "^7.3.7" - tsutils "^3.21.0" - -"@typescript-eslint/typescript-estree@7.7.0": - version "7.7.0" - resolved "https://registry.yarnpkg.com/@typescript-eslint/typescript-estree/-/typescript-estree-7.7.0.tgz#b5dd6383b4c6a852d7b256a37af971e8982be97f" - integrity sha512-8p71HQPE6CbxIBy2kWHqM1KGrC07pk6RJn40n0DSc6bMOBBREZxSDJ+BmRzc8B5OdaMh1ty3mkuWRg4sCFiDQQ== - dependencies: - "@typescript-eslint/types" "7.7.0" - "@typescript-eslint/visitor-keys" "7.7.0" + "@typescript-eslint/types" "7.12.0" + "@typescript-eslint/visitor-keys" "7.12.0" debug "^4.3.4" globby "^11.1.0" is-glob "^4.0.3" @@ -1507,47 +1497,22 @@ semver "^7.6.0" ts-api-utils "^1.3.0" -"@typescript-eslint/utils@7.7.0": - version "7.7.0" - resolved "https://registry.yarnpkg.com/@typescript-eslint/utils/-/utils-7.7.0.tgz#3d2b6606a60ac34f3c625facfb3b3ab7e126f58d" - integrity sha512-LKGAXMPQs8U/zMRFXDZOzmMKgFv3COlxUQ+2NMPhbqgVm6R1w+nU1i4836Pmxu9jZAuIeyySNrN/6Rc657ggig== +"@typescript-eslint/utils@7.12.0", "@typescript-eslint/utils@^6.0.0 || ^7.0.0": + version "7.12.0" + resolved "https://registry.yarnpkg.com/@typescript-eslint/utils/-/utils-7.12.0.tgz#c6e58fd7f724cdccc848f71e388ad80cbdb95dd0" + integrity sha512-Y6hhwxwDx41HNpjuYswYp6gDbkiZ8Hin9Bf5aJQn1bpTs3afYY4GX+MPYxma8jtoIV2GRwTM/UJm/2uGCVv+DQ== dependencies: "@eslint-community/eslint-utils" "^4.4.0" - "@types/json-schema" "^7.0.15" - "@types/semver" "^7.5.8" - "@typescript-eslint/scope-manager" "7.7.0" - "@typescript-eslint/types" "7.7.0" - "@typescript-eslint/typescript-estree" "7.7.0" - semver "^7.6.0" + "@typescript-eslint/scope-manager" "7.12.0" + "@typescript-eslint/types" "7.12.0" + "@typescript-eslint/typescript-estree" "7.12.0" -"@typescript-eslint/utils@^5.10.0": - version "5.62.0" - resolved "https://registry.yarnpkg.com/@typescript-eslint/utils/-/utils-5.62.0.tgz#141e809c71636e4a75daa39faed2fb5f4b10df86" - integrity sha512-n8oxjeb5aIbPFEtmQxQYOLI0i9n5ySBEY/ZEHHZqKQSFnxio1rv6dthascc9dLuwrL0RC5mPCxB7vnAVGAYWAQ== +"@typescript-eslint/visitor-keys@7.12.0": + version "7.12.0" + resolved "https://registry.yarnpkg.com/@typescript-eslint/visitor-keys/-/visitor-keys-7.12.0.tgz#c053b55a996679528beeedd8e565710ce1ae1ad3" + integrity sha512-uZk7DevrQLL3vSnfFl5bj4sL75qC9D6EdjemIdbtkuUmIheWpuiiylSY01JxJE7+zGrOWDZrp1WxOuDntvKrHQ== dependencies: - "@eslint-community/eslint-utils" "^4.2.0" - "@types/json-schema" "^7.0.9" - "@types/semver" "^7.3.12" - "@typescript-eslint/scope-manager" "5.62.0" - "@typescript-eslint/types" "5.62.0" - "@typescript-eslint/typescript-estree" "5.62.0" - eslint-scope "^5.1.1" - semver "^7.3.7" - -"@typescript-eslint/visitor-keys@5.62.0": - version "5.62.0" - resolved "https://registry.yarnpkg.com/@typescript-eslint/visitor-keys/-/visitor-keys-5.62.0.tgz#2174011917ce582875954ffe2f6912d5931e353e" - integrity sha512-07ny+LHRzQXepkGg6w0mFY41fVUNBrL2Roj/++7V1txKugfjm/Ci/qSND03r2RhlJhJYMcTn9AhhSSqQp0Ysyw== - dependencies: - "@typescript-eslint/types" "5.62.0" - eslint-visitor-keys "^3.3.0" - -"@typescript-eslint/visitor-keys@7.7.0": - version "7.7.0" - resolved "https://registry.yarnpkg.com/@typescript-eslint/visitor-keys/-/visitor-keys-7.7.0.tgz#950148cf1ac11562a2d903fdf7acf76714a2dc9e" - integrity sha512-h0WHOj8MhdhY8YWkzIF30R379y0NqyOHExI9N9KCzvmu05EgG4FumeYa3ccfKUSphyWkWQE1ybVrgz/Pbam6YA== - dependencies: - "@typescript-eslint/types" "7.7.0" + "@typescript-eslint/types" "7.12.0" eslint-visitor-keys "^3.4.3" "@ungap/structured-clone@^1.2.0": @@ -1706,7 +1671,7 @@ acorn@^6.4.1: resolved "https://registry.yarnpkg.com/acorn/-/acorn-6.4.2.tgz#35866fd710528e92de10cf06016498e47e39e1e6" integrity sha512-XtGIhXwF8YM8bJhGxG5kXgjkEuNGLTkoYqVE+KMR+aspr4KGYmKYg7yUe3KghyQ9yheNwLnjmzh/7+gfDBmHCQ== -acorn@^8.0.4, acorn@^8.4.1, acorn@^8.7.1, acorn@^8.8.2, acorn@^8.9.0: +acorn@^8.0.4, acorn@^8.11.3, acorn@^8.4.1, acorn@^8.7.1, acorn@^8.8.2, acorn@^8.9.0: version "8.11.3" resolved "https://registry.yarnpkg.com/acorn/-/acorn-8.11.3.tgz#71e0b14e13a4ec160724b38fb7b0f233b1b81d7a" integrity sha512-Y9rRfJG5jcKOE0CLisYbojUjIrIEE7AGMzA/Sm4BslANhbS+cDMpgBdcPT91oJ7OuJ9hYJBx59RjbhxVnrF8Xg== @@ -2562,10 +2527,10 @@ copy-props@^2.0.1: each-props "^1.3.2" is-plain-object "^5.0.0" -core-js-compat@^3.34.0: - version "3.36.1" - resolved "https://registry.yarnpkg.com/core-js-compat/-/core-js-compat-3.36.1.tgz#1818695d72c99c25d621dca94e6883e190cea3c8" - integrity sha512-Dk997v9ZCt3X/npqzyGdTlq6t7lDBhZwGvV94PKzDArjp7BTRm7WlDAXYd/OWdeFHO8OChQYRJNJvUCqCbrtKA== +core-js-compat@^3.37.0: + version "3.37.1" + resolved "https://registry.yarnpkg.com/core-js-compat/-/core-js-compat-3.37.1.tgz#c844310c7852f4bdf49b8d339730b97e17ff09ee" + integrity sha512-9TNiImhKvQqSUkOvk/mMRZzOANTiEVC7WaBNhHcKM7x+/5E1l5NvsysR19zuDQScE8k+kfQXWRN3AtS/eOSHpg== dependencies: browserslist "^4.23.0" @@ -2947,34 +2912,34 @@ esbuild-plugin-alias@0.2.1: resolved "https://registry.yarnpkg.com/esbuild-plugin-alias/-/esbuild-plugin-alias-0.2.1.tgz#45a86cb941e20e7c2bc68a2bea53562172494fcb" integrity sha512-jyfL/pwPqaFXyKnj8lP8iLk6Z0m099uXR45aSN8Av1XD4vhvQutxxPzgA2bTcAwQpa1zCXDcWOlhFgyP3GKqhQ== -esbuild@0.20.2: - version "0.20.2" - resolved "https://registry.yarnpkg.com/esbuild/-/esbuild-0.20.2.tgz#9d6b2386561766ee6b5a55196c6d766d28c87ea1" - integrity sha512-WdOOppmUNU+IbZ0PaDiTst80zjnrOkyJNHoKupIcVyU8Lvla3Ugx94VzkQ32Ijqd7UhHJy75gNWDMUekcrSJ6g== +esbuild@0.21.4: + version "0.21.4" + resolved "https://registry.yarnpkg.com/esbuild/-/esbuild-0.21.4.tgz#ceb501def8edb12a5bfd9c55f3a96db698edf022" + integrity sha512-sFMcNNrj+Q0ZDolrp5pDhH0nRPN9hLIM3fRPwgbLYJeSHHgnXSnbV3xYgSVuOeLWH9c73VwmEverVzupIv5xuA== optionalDependencies: - "@esbuild/aix-ppc64" "0.20.2" - "@esbuild/android-arm" "0.20.2" - "@esbuild/android-arm64" "0.20.2" - "@esbuild/android-x64" "0.20.2" - "@esbuild/darwin-arm64" "0.20.2" - "@esbuild/darwin-x64" "0.20.2" - "@esbuild/freebsd-arm64" "0.20.2" - "@esbuild/freebsd-x64" "0.20.2" - "@esbuild/linux-arm" "0.20.2" - "@esbuild/linux-arm64" "0.20.2" - "@esbuild/linux-ia32" "0.20.2" - "@esbuild/linux-loong64" "0.20.2" - "@esbuild/linux-mips64el" "0.20.2" - "@esbuild/linux-ppc64" "0.20.2" - "@esbuild/linux-riscv64" "0.20.2" - "@esbuild/linux-s390x" "0.20.2" - "@esbuild/linux-x64" "0.20.2" - "@esbuild/netbsd-x64" "0.20.2" - "@esbuild/openbsd-x64" "0.20.2" - "@esbuild/sunos-x64" "0.20.2" - "@esbuild/win32-arm64" "0.20.2" - "@esbuild/win32-ia32" "0.20.2" - "@esbuild/win32-x64" "0.20.2" + "@esbuild/aix-ppc64" "0.21.4" + "@esbuild/android-arm" "0.21.4" + "@esbuild/android-arm64" "0.21.4" + "@esbuild/android-x64" "0.21.4" + "@esbuild/darwin-arm64" "0.21.4" + "@esbuild/darwin-x64" "0.21.4" + "@esbuild/freebsd-arm64" "0.21.4" + "@esbuild/freebsd-x64" "0.21.4" + "@esbuild/linux-arm" "0.21.4" + "@esbuild/linux-arm64" "0.21.4" + "@esbuild/linux-ia32" "0.21.4" + "@esbuild/linux-loong64" "0.21.4" + "@esbuild/linux-mips64el" "0.21.4" + "@esbuild/linux-ppc64" "0.21.4" + "@esbuild/linux-riscv64" "0.21.4" + "@esbuild/linux-s390x" "0.21.4" + "@esbuild/linux-x64" "0.21.4" + "@esbuild/netbsd-x64" "0.21.4" + "@esbuild/openbsd-x64" "0.21.4" + "@esbuild/sunos-x64" "0.21.4" + "@esbuild/win32-arm64" "0.21.4" + "@esbuild/win32-ia32" "0.21.4" + "@esbuild/win32-x64" "0.21.4" esbuild@^0.19.6: version "0.19.12" @@ -3030,24 +2995,24 @@ escape-string-regexp@^4.0.0: resolved "https://registry.yarnpkg.com/escape-string-regexp/-/escape-string-regexp-4.0.0.tgz#14ba83a5d373e3d311e5afca29cf5bfad965bf34" integrity sha512-TtpcNJ3XAzx3Gq8sWRzJaVajRs0uVxA2YAkdb1jm2YkPz4G6egUFAyA3n5vtEIZefPk5Wa4UXbKuS5fKkJWdgA== -eslint-plugin-jest@27.9.0: - version "27.9.0" - resolved "https://registry.yarnpkg.com/eslint-plugin-jest/-/eslint-plugin-jest-27.9.0.tgz#7c98a33605e1d8b8442ace092b60e9919730000b" - integrity sha512-QIT7FH7fNmd9n4se7FFKHbsLKGQiw885Ds6Y/sxKgCZ6natwCsXdgPOADnYVxN2QrRweF0FZWbJ6S7Rsn7llug== +eslint-plugin-jest@28.5.0: + version "28.5.0" + resolved "https://registry.yarnpkg.com/eslint-plugin-jest/-/eslint-plugin-jest-28.5.0.tgz#b497b795de37f671eaccd38bd83030186ff5dc8d" + integrity sha512-6np6DGdmNq/eBbA7HOUNV8fkfL86PYwBfwyb8n23FXgJNTR8+ot3smRHjza9LGsBBZRypK3qyF79vMjohIL8eQ== dependencies: - "@typescript-eslint/utils" "^5.10.0" + "@typescript-eslint/utils" "^6.0.0 || ^7.0.0" -eslint-plugin-unicorn@52.0.0: - version "52.0.0" - resolved "https://registry.yarnpkg.com/eslint-plugin-unicorn/-/eslint-plugin-unicorn-52.0.0.tgz#c7a559edd52e3932cf2b3a05c3b0efc604c1eeb8" - integrity sha512-1Yzm7/m+0R4djH0tjDjfVei/ju2w3AzUGjG6q8JnuNIL5xIwsflyCooW5sfBvQp2pMYQFSWWCFONsjCax1EHng== +eslint-plugin-unicorn@53.0.0: + version "53.0.0" + resolved "https://registry.yarnpkg.com/eslint-plugin-unicorn/-/eslint-plugin-unicorn-53.0.0.tgz#df3a5c9ecabeb759e6fd867b2d84198466ac8c4d" + integrity sha512-kuTcNo9IwwUCfyHGwQFOK/HjJAYzbODHN3wP0PgqbW+jbXqpNWxNVpVhj2tO9SixBwuAdmal8rVcWKBxwFnGuw== dependencies: - "@babel/helper-validator-identifier" "^7.22.20" + "@babel/helper-validator-identifier" "^7.24.5" "@eslint-community/eslint-utils" "^4.4.0" - "@eslint/eslintrc" "^2.1.4" + "@eslint/eslintrc" "^3.0.2" ci-info "^4.0.0" clean-regexp "^1.0.0" - core-js-compat "^3.34.0" + core-js-compat "^3.37.0" esquery "^1.5.0" indent-string "^4.0.0" is-builtin-module "^3.2.1" @@ -3056,10 +3021,10 @@ eslint-plugin-unicorn@52.0.0: read-pkg-up "^7.0.1" regexp-tree "^0.1.27" regjsparser "^0.10.0" - semver "^7.5.4" + semver "^7.6.1" strip-indent "^3.0.0" -eslint-scope@5.1.1, eslint-scope@^5.1.1: +eslint-scope@5.1.1: version "5.1.1" resolved "https://registry.yarnpkg.com/eslint-scope/-/eslint-scope-5.1.1.tgz#e786e59a66cb92b3f6c1fb0d508aab174848f48c" integrity sha512-2NxwbF/hZ0KpepYN0cNbo+FN6XoK7GaHlQhgx/hIZl6Va0bF45RQOOwhLIy8lQDbuCiadSLCBnH2CFYquit5bw== @@ -3080,6 +3045,11 @@ eslint-visitor-keys@^3.3.0, eslint-visitor-keys@^3.4.1, eslint-visitor-keys@^3.4 resolved "https://registry.yarnpkg.com/eslint-visitor-keys/-/eslint-visitor-keys-3.4.3.tgz#0cd72fe8550e3c2eae156a96a4dddcd1c8ac5800" integrity sha512-wpc+LXeiyiisxPlEkUzU6svyS1frIO3Mgxj1fdy7Pm8Ygzguax2N3Fa/D/ag1WqbOprdI+uY6wMUl8/a2G+iag== +eslint-visitor-keys@^4.0.0: + version "4.0.0" + resolved "https://registry.yarnpkg.com/eslint-visitor-keys/-/eslint-visitor-keys-4.0.0.tgz#e3adc021aa038a2a8e0b2f8b0ce8f66b9483b1fb" + integrity sha512-OtIRv/2GyiF6o/d8K7MYKKbXrOUBIK6SfkIRM4Z0dY3w+LiQ0vy3F57m0Z71bjbyeiWFiHJ8brqnmE6H6/jEuw== + eslint@8.57.0: version "8.57.0" resolved "https://registry.yarnpkg.com/eslint/-/eslint-8.57.0.tgz#c786a6fd0e0b68941aaf624596fb987089195668" @@ -3138,6 +3108,15 @@ esniff@^2.0.1: event-emitter "^0.3.5" type "^2.7.2" +espree@^10.0.1: + version "10.0.1" + resolved "https://registry.yarnpkg.com/espree/-/espree-10.0.1.tgz#600e60404157412751ba4a6f3a2ee1a42433139f" + integrity sha512-MWkrWZbJsL2UwnjxTX3gG8FneachS/Mwg7tdGXce011sJd5b0JG54vat5KHnfSBODZ3Wvzd2WnjxyzsRoVv+ww== + dependencies: + acorn "^8.11.3" + acorn-jsx "^5.3.2" + eslint-visitor-keys "^4.0.0" + espree@^9.6.0, espree@^9.6.1: version "9.6.1" resolved "https://registry.yarnpkg.com/espree/-/espree-9.6.1.tgz#a2a17b8e434690a5432f2f8018ce71d331a48c6f" @@ -3650,16 +3629,16 @@ glob-watcher@^5.0.3: normalize-path "^3.0.0" object.defaults "^1.1.0" -glob@10.3.12: - version "10.3.12" - resolved "https://registry.yarnpkg.com/glob/-/glob-10.3.12.tgz#3a65c363c2e9998d220338e88a5f6ac97302960b" - integrity sha512-TCNv8vJ+xz4QiqTpfOJA7HvYv+tNIRHKfUWw/q+v2jdgN4ebz+KY9tGx5J4rHP0o84mNP+ApH66HRX8us3Khqg== +glob@10.4.1: + version "10.4.1" + resolved "https://registry.yarnpkg.com/glob/-/glob-10.4.1.tgz#0cfb01ab6a6b438177bfe6a58e2576f6efe909c2" + integrity sha512-2jelhlq3E4ho74ZyVLN03oKdAZVUa6UDZzFLVH1H7dnoax+y9qyaq8zBkfDIggjniU19z0wU18y16jMB2eyVIw== dependencies: foreground-child "^3.1.0" - jackspeak "^2.3.6" - minimatch "^9.0.1" - minipass "^7.0.4" - path-scurry "^1.10.2" + jackspeak "^3.1.2" + minimatch "^9.0.4" + minipass "^7.1.2" + path-scurry "^1.11.1" glob@^7.1.1, glob@^7.1.3, glob@^7.1.4: version "7.2.3" @@ -3705,6 +3684,11 @@ globals@^13.19.0: dependencies: type-fest "^0.20.2" +globals@^14.0.0: + version "14.0.0" + resolved "https://registry.yarnpkg.com/globals/-/globals-14.0.0.tgz#898d7413c29babcf6bafe56fcadded858ada724e" + integrity sha512-oahGvuMGQlPw/ivIYBjVSrWAfWLBeku5tpPE2fOPLi+WHffIWbuh2tCjhyQhTBPMf5E9jDEH4FOmTYgYwbKwtQ== + globby@^11.1.0: version "11.1.0" resolved "https://registry.yarnpkg.com/globby/-/globby-11.1.0.tgz#bd4be98bb042f83d796f7e3811991fbe82a0d34b" @@ -4018,6 +4002,11 @@ human-signals@^2.1.0: resolved "https://registry.yarnpkg.com/human-signals/-/human-signals-2.1.0.tgz#dc91fcba42e4d06e4abaed33b3e7a3c02f514ea0" integrity sha512-B4FFZ6q/T2jhhksgkbEW3HBvWIfDW85snkQgawt07S7J5QXTk6BkNV+0yAeZrM5QpMAdYlocGoljn0sJ/WQkFw== +hyperdyperid@^1.2.0: + version "1.2.0" + resolved "https://registry.yarnpkg.com/hyperdyperid/-/hyperdyperid-1.2.0.tgz#59668d323ada92228d2a869d3e474d5a33b69e6b" + integrity sha512-Y93lCzHYgGWdrJ66yIktxiaGULYc6oGiABxhcO5AufBeOyoIdZF7bIfLaOrbM0iGIOXQQgxxRrFEnb+Y6w1n4A== + ignore@^5.2.0, ignore@^5.2.4, ignore@^5.3.1: version "5.3.1" resolved "https://registry.yarnpkg.com/ignore/-/ignore-5.3.1.tgz#5073e554cd42c5b33b394375f538b8593e34d4ef" @@ -4387,18 +4376,18 @@ istextorbinary@^3.0.0: binaryextensions "^2.2.0" textextensions "^3.2.0" -ix@5.0.0: - version "5.0.0" - resolved "https://registry.yarnpkg.com/ix/-/ix-5.0.0.tgz#b9e292f79b1876bbf696809fe86e42930bdbfcd4" - integrity sha512-6LyyrHnvNrSy5pKtW/KA+KKusHrB223aBJCJlIGPN7QBfDkEEtNrAkAz9lLLShIcdJntq6BiPCHuKaCM/9wwXw== +ix@6.0.0: + version "6.0.0" + resolved "https://registry.yarnpkg.com/ix/-/ix-6.0.0.tgz#c1875523f8090c7146dc3ac3412a763663887f27" + integrity sha512-B/KeYkHtOWbr3ttckqWT9uha2ixw9fGVDxX+DwVXhO+P5eOhyCadt+aC30hRBvG+do+tbI3xbYDMYN6dp1C4Vw== dependencies: - "@types/node" "^13.7.4" - tslib "^2.3.0" + "@types/node" ">=13.7.4" + tslib "^2.6.2" -jackspeak@^2.3.6: - version "2.3.6" - resolved "https://registry.yarnpkg.com/jackspeak/-/jackspeak-2.3.6.tgz#647ecc472238aee4b06ac0e461acc21a8c505ca8" - integrity sha512-N3yCS/NegsOBokc8GAdM8UcmfsKiSS8cipheD/nivzr700H+nsMOxJjQnvwOcRYVuFkdH0wGUvW2WbXGmrZGbQ== +jackspeak@^3.1.2: + version "3.1.2" + resolved "https://registry.yarnpkg.com/jackspeak/-/jackspeak-3.1.2.tgz#eada67ea949c6b71de50f1b09c92a961897b90ab" + integrity sha512-kWmLKn2tRtfYMF/BakihVVRzBKOxz4gJMiL2Rj91WnAB5TPZumSH99R/Yf1qE1u4uRimvCSJfm6hnxohXeEXjQ== dependencies: "@isaacs/cliui" "^8.0.2" optionalDependencies: @@ -4678,10 +4667,10 @@ jest-runtime@^29.7.0: slash "^3.0.0" strip-bom "^4.0.0" -jest-silent-reporter@0.5.0: - version "0.5.0" - resolved "https://registry.yarnpkg.com/jest-silent-reporter/-/jest-silent-reporter-0.5.0.tgz#5fd8ccd61665227e3bf19d908b7350719d06ff38" - integrity sha512-epdLt8Oj0a1AyRiR6F8zx/1SVT1Mi7VU3y4wB2uOBHs/ohIquC7v2eeja7UN54uRPyHInIKWdL+RdG228n5pJQ== +jest-silent-reporter@0.6.0: + version "0.6.0" + resolved "https://registry.yarnpkg.com/jest-silent-reporter/-/jest-silent-reporter-0.6.0.tgz#e9c63a3b1e3c80571d690d998faf842f576b6a60" + integrity sha512-4nmS+5o7ycVlvbQOTx7CnGdbBtP2646hnDgQpQLaVhjHcQNHD+gqBAetyjRDlgpZ8+8N82MWI59K+EX2LsVk7g== dependencies: chalk "^4.0.0" jest-util "^26.0.0" @@ -5165,11 +5154,14 @@ matchdep@^2.0.0: resolve "^1.4.0" stack-trace "0.0.10" -memfs@4.8.2: - version "4.8.2" - resolved "https://registry.yarnpkg.com/memfs/-/memfs-4.8.2.tgz#9bb7c3e43647348451082557f05fb170b7442949" - integrity sha512-j4WKth315edViMBGkHW6NTF0QBjsTrcRDmYNcGsPq+ozMEyCCCIlX2d2mJ5wuh6iHvJ3FevUrr48v58YRqVdYg== +memfs@4.9.2: + version "4.9.2" + resolved "https://registry.yarnpkg.com/memfs/-/memfs-4.9.2.tgz#42e7b48207268dad8c9c48ea5d4952c5d3840433" + integrity sha512-f16coDZlTG1jskq3mxarwB+fGRrd0uXWt+o1WIhRfOwbXQZqUDsTVxQBFK9JjRQHblg8eAG2JSbprDXKjc7ijQ== dependencies: + "@jsonjoy.com/json-pack" "^1.0.3" + "@jsonjoy.com/util" "^1.1.2" + sonic-forest "^1.0.0" tslib "^2.0.0" memoizee@0.4.X: @@ -5278,7 +5270,7 @@ minimatch@^3.0.4, minimatch@^3.0.5, minimatch@^3.1.1, minimatch@^3.1.2: dependencies: brace-expansion "^1.1.7" -minimatch@^9.0.1, minimatch@^9.0.3, minimatch@^9.0.4: +minimatch@^9.0.3, minimatch@^9.0.4: version "9.0.4" resolved "https://registry.yarnpkg.com/minimatch/-/minimatch-9.0.4.tgz#8e49c731d1749cbec05050ee5145147b32496a51" integrity sha512-KqWh+VchfxcMNRAJjj2tnsSJdNbHsVgnkBhTNrW7AjVo6OvLtxw8zfT9oLw1JSohlFzJ8jCoTgaoXvJ+kHt6fw== @@ -5299,11 +5291,16 @@ minimist@1.x: resolved "https://registry.yarnpkg.com/minimist/-/minimist-1.2.8.tgz#c1a464e7693302e082a075cee0c057741ac4772c" integrity sha512-2yyAR8qBkN3YuheJanUpWC5U3bb5osDywNB8RzDVlDwDHbocAJveqqj1u8+SVD7jkWT4yvsHCpWqqWqAxb0zCA== -"minipass@^5.0.0 || ^6.0.2 || ^7.0.0", minipass@^7.0.4: +"minipass@^5.0.0 || ^6.0.2 || ^7.0.0": version "7.0.4" resolved "https://registry.yarnpkg.com/minipass/-/minipass-7.0.4.tgz#dbce03740f50a4786ba994c1fb908844d27b038c" integrity sha512-jYofLM5Dam9279rdkWzqHozUo4ybjdZmCsDHePy5V/PbBcVMiSZR97gmAy45aqi8CK1lG2ECd356FU86avfwUQ== +minipass@^7.1.2: + version "7.1.2" + resolved "https://registry.yarnpkg.com/minipass/-/minipass-7.1.2.tgz#93a9626ce5e5e66bd4db86849e7515e92340a707" + integrity sha512-qOOzS1cBTWYF4BH8fVePDBOO9iptMnGUEZwNc/cMWnTV2nVLZ7VoNWEPHkYczZA0pdoA7dl6e7FL659nX9S2aw== + mixin-deep@^1.2.0: version "1.3.2" resolved "https://registry.yarnpkg.com/mixin-deep/-/mixin-deep-1.3.2.tgz#1120b43dc359a785dce65b55b82e257ccf479566" @@ -5694,10 +5691,10 @@ path-root@^0.1.1: dependencies: path-root-regex "^0.1.0" -path-scurry@^1.10.2: - version "1.10.2" - resolved "https://registry.yarnpkg.com/path-scurry/-/path-scurry-1.10.2.tgz#8f6357eb1239d5fa1da8b9f70e9c080675458ba7" - integrity sha512-7xTavNy5RQXnsjANvVvMkEjvloOinkAjv/Z6Ildz9v2RinZ4SBKTWFOVRbaF8p0vpHnyjV/UwNDdKuUv6M5qcA== +path-scurry@^1.11.1: + version "1.11.1" + resolved "https://registry.yarnpkg.com/path-scurry/-/path-scurry-1.11.1.tgz#7960a668888594a0720b12a911d1a742ab9f11d2" + integrity sha512-Xa4Nw17FS9ApQFJ9umLiJS4orGjm7ZzwUrwamcGQuHSzDyth9boKDaycYdDcZDuqYATXw4HFXgaqWTctW/v1HA== dependencies: lru-cache "^10.2.0" minipass "^5.0.0 || ^6.0.2 || ^7.0.0" @@ -6163,29 +6160,29 @@ rimraf@^3.0.2: dependencies: glob "^7.1.3" -rollup@4.14.3: - version "4.14.3" - resolved "https://registry.yarnpkg.com/rollup/-/rollup-4.14.3.tgz#bcbb7784b35826d3164346fa6d5aac95190d8ba9" - integrity sha512-ag5tTQKYsj1bhrFC9+OEWqb5O6VYgtQDO9hPDBMmIbePwhfSr+ExlcU741t8Dhw5DkPCQf6noz0jb36D6W9/hw== +rollup@4.18.0: + version "4.18.0" + resolved "https://registry.yarnpkg.com/rollup/-/rollup-4.18.0.tgz#497f60f0c5308e4602cf41136339fbf87d5f5dda" + integrity sha512-QmJz14PX3rzbJCN1SG4Xe/bAAX2a6NpCP8ab2vfu2GiUr8AQcr2nCV/oEO3yneFarB67zk8ShlIyWb2LGTb3Sg== dependencies: "@types/estree" "1.0.5" optionalDependencies: - "@rollup/rollup-android-arm-eabi" "4.14.3" - "@rollup/rollup-android-arm64" "4.14.3" - "@rollup/rollup-darwin-arm64" "4.14.3" - "@rollup/rollup-darwin-x64" "4.14.3" - "@rollup/rollup-linux-arm-gnueabihf" "4.14.3" - "@rollup/rollup-linux-arm-musleabihf" "4.14.3" - "@rollup/rollup-linux-arm64-gnu" "4.14.3" - "@rollup/rollup-linux-arm64-musl" "4.14.3" - "@rollup/rollup-linux-powerpc64le-gnu" "4.14.3" - "@rollup/rollup-linux-riscv64-gnu" "4.14.3" - "@rollup/rollup-linux-s390x-gnu" "4.14.3" - "@rollup/rollup-linux-x64-gnu" "4.14.3" - "@rollup/rollup-linux-x64-musl" "4.14.3" - "@rollup/rollup-win32-arm64-msvc" "4.14.3" - "@rollup/rollup-win32-ia32-msvc" "4.14.3" - "@rollup/rollup-win32-x64-msvc" "4.14.3" + "@rollup/rollup-android-arm-eabi" "4.18.0" + "@rollup/rollup-android-arm64" "4.18.0" + "@rollup/rollup-darwin-arm64" "4.18.0" + "@rollup/rollup-darwin-x64" "4.18.0" + "@rollup/rollup-linux-arm-gnueabihf" "4.18.0" + "@rollup/rollup-linux-arm-musleabihf" "4.18.0" + "@rollup/rollup-linux-arm64-gnu" "4.18.0" + "@rollup/rollup-linux-arm64-musl" "4.18.0" + "@rollup/rollup-linux-powerpc64le-gnu" "4.18.0" + "@rollup/rollup-linux-riscv64-gnu" "4.18.0" + "@rollup/rollup-linux-s390x-gnu" "4.18.0" + "@rollup/rollup-linux-x64-gnu" "4.18.0" + "@rollup/rollup-linux-x64-musl" "4.18.0" + "@rollup/rollup-win32-arm64-msvc" "4.18.0" + "@rollup/rollup-win32-ia32-msvc" "4.18.0" + "@rollup/rollup-win32-x64-msvc" "4.18.0" fsevents "~2.3.2" run-parallel@^1.1.9: @@ -6250,12 +6247,10 @@ semver@^6.3.0, semver@^6.3.1: resolved "https://registry.yarnpkg.com/semver/-/semver-6.3.1.tgz#556d2ef8689146e46dcea4bfdd095f3434dffcb4" integrity sha512-BR7VvDCVHO+q2xBEWskxS6DJE1qRnb7DxzUrogb71CWoSficBxYsiAGd+Kl0mmq/MprG9yArRkyrQxTO6XjMzA== -semver@^7.3.4, semver@^7.3.7, semver@^7.5.3, semver@^7.5.4, semver@^7.6.0: - version "7.6.0" - resolved "https://registry.yarnpkg.com/semver/-/semver-7.6.0.tgz#1a46a4db4bffcccd97b743b5005c8325f23d4e2d" - integrity sha512-EnwXhrlwXMk9gKu5/flx5sv/an57AkRplG3hTK68W7FRDN+k+OWBj65M7719OkA82XLBxrcX0KSHj+X5COhOVg== - dependencies: - lru-cache "^6.0.0" +semver@^7.3.4, semver@^7.5.3, semver@^7.5.4, semver@^7.6.0, semver@^7.6.1: + version "7.6.2" + resolved "https://registry.yarnpkg.com/semver/-/semver-7.6.2.tgz#1e3b34759f896e8f14d6134732ce798aeb0c6e13" + integrity sha512-FNAIBWCx9qcRhoHcgcJ0gvU7SN1lYU2ZXuSfl04bSC5OpvDHFyJCjdNHomPXxjQlCBU67YW64PzY7/VIEH7F2w== serialize-javascript@^6.0.1: version "6.0.2" @@ -6386,6 +6381,13 @@ snapdragon@^0.8.1: source-map-resolve "^0.5.0" use "^3.1.0" +sonic-forest@^1.0.0: + version "1.0.2" + resolved "https://registry.yarnpkg.com/sonic-forest/-/sonic-forest-1.0.2.tgz#d80aa621d1cffe75a606ca44789ccff30f5b9ce6" + integrity sha512-2rICdwIJi5kVlehMUVtJeHn3ohh5YZV4pDv0P0c1M11cRz/gXNViItpM94HQwfvnXuzybpqK0LZJgTa3lEwtAw== + dependencies: + tree-dump "^1.0.0" + source-map-resolve@^0.5.0: version "0.5.3" resolved "https://registry.yarnpkg.com/source-map-resolve/-/source-map-resolve-0.5.3.tgz#190866bece7553e1f8f267a2ee82c606b5509a1a" @@ -6755,6 +6757,11 @@ textextensions@^3.2.0: resolved "https://registry.yarnpkg.com/textextensions/-/textextensions-3.3.0.tgz#03530d5287b86773c08b77458589148870cc71d3" integrity sha512-mk82dS8eRABNbeVJrEiN5/UMSCliINAuz8mkUwH4SwslkNP//gbEzlWNS5au0z5Dpx40SQxzqZevZkn+WYJ9Dw== +thingies@^1.20.0: + version "1.21.0" + resolved "https://registry.yarnpkg.com/thingies/-/thingies-1.21.0.tgz#e80fbe58fd6fdaaab8fad9b67bd0a5c943c445c1" + integrity sha512-hsqsJsFMsV+aD4s3CWKk85ep/3I9XzYV/IXaSouJMYIoDlgyi11cBhsqYe9/geRfB0YIikBQg6raRaM+nIMP9g== + through2-filter@^3.0.0: version "3.0.0" resolved "https://registry.yarnpkg.com/through2-filter/-/through2-filter-3.0.0.tgz#700e786df2367c2c88cd8aa5be4cf9c1e7831254" @@ -6866,6 +6873,11 @@ totalist@^3.0.0: resolved "https://registry.yarnpkg.com/totalist/-/totalist-3.0.1.tgz#ba3a3d600c915b1a97872348f79c127475f6acf8" integrity sha512-sf4i37nQ2LBx4m3wB74y+ubopq6W/dIzXg0FDGjsYnZHVa1Da8FH853wlL2gtUhg+xJXjfk3kUZS3BRoQeoQBQ== +tree-dump@^1.0.0: + version "1.0.1" + resolved "https://registry.yarnpkg.com/tree-dump/-/tree-dump-1.0.1.tgz#b448758da7495580e6b7830d6b7834fca4c45b96" + integrity sha512-WCkcRBVPSlHHq1dc/px9iOfqklvzCbdRwvlNfxGZsrHqf6aZttfPrd7DJTt6oR10dwUfpFFQeVTkPbBIZxX/YA== + trim-newlines@^4.0.2: version "4.1.1" resolved "https://registry.yarnpkg.com/trim-newlines/-/trim-newlines-4.1.1.tgz#28c88deb50ed10c7ba6dc2474421904a00139125" @@ -6876,10 +6888,10 @@ ts-api-utils@^1.3.0: resolved "https://registry.yarnpkg.com/ts-api-utils/-/ts-api-utils-1.3.0.tgz#4b490e27129f1e8e686b45cc4ab63714dc60eea1" integrity sha512-UQMIo7pb8WRomKR1/+MFVLTroIvDVtMX3K6OUir8ynLyzB8Jeriont2bTAtmNPa1ekAgN7YPDyf6V+ygrdU+eQ== -ts-jest@29.1.2: - version "29.1.2" - resolved "https://registry.yarnpkg.com/ts-jest/-/ts-jest-29.1.2.tgz#7613d8c81c43c8cb312c6904027257e814c40e09" - integrity sha512-br6GJoH/WUX4pu7FbZXuWGKGNDuU7b8Uj77g/Sp7puZV6EXzuByl6JrECvm0MzVzSTkSHWTihsXt+5XYER5b+g== +ts-jest@29.1.4: + version "29.1.4" + resolved "https://registry.yarnpkg.com/ts-jest/-/ts-jest-29.1.4.tgz#26f8a55ce31e4d2ef7a1fd47dc7fa127e92793ef" + integrity sha512-YiHwDhSvCiItoAgsKtoLFCuakDzDsJ1DLDnSouTaTmdOcOwIkSzbLXduaQ6M5DRVhuZC/NYaaZ/mtHbWMv/S6Q== dependencies: bs-logger "0.x" fast-json-stable-stringify "2.x" @@ -6909,23 +6921,11 @@ ts-node@10.9.2: v8-compile-cache-lib "^3.0.1" yn "3.1.1" -tslib@^1.8.1: - version "1.14.1" - resolved "https://registry.yarnpkg.com/tslib/-/tslib-1.14.1.tgz#cf2d38bdc34a134bcaf1091c41f6619e2f672d00" - integrity sha512-Xni35NKzjgMrwevysHTCArtLDpPvye8zV/0E4EyYn43P7/7qvQwPh9BGkHewbMulVntbigmcT7rdX3BNo9wRJg== - -tslib@^2.0.0, tslib@^2.1.0, tslib@^2.3.0, tslib@^2.4.0, tslib@^2.6.2: +tslib@^2.0.0, tslib@^2.1.0, tslib@^2.4.0, tslib@^2.6.2: version "2.6.2" resolved "https://registry.yarnpkg.com/tslib/-/tslib-2.6.2.tgz#703ac29425e7b37cd6fd456e92404d46d1f3e4ae" integrity sha512-AEYxH93jGFPn/a2iVAwW87VuUIkR1FVUKB77NwMF7nBTDkDrrT/Hpt/IrCJ0QXhW27jTBDcf5ZY7w6RiqTMw2Q== -tsutils@^3.21.0: - version "3.21.0" - resolved "https://registry.yarnpkg.com/tsutils/-/tsutils-3.21.0.tgz#b48717d394cea6c1e096983eed58e9d61715b623" - integrity sha512-mHKK3iUXL+3UF6xL5k0PEhKRUBKPBCv/+RkEOpjRWxxx27KKRBmmA60A9pgOUvMi8GKhRMPEmjBRPzs2W7O1OA== - dependencies: - tslib "^1.8.1" - type-check@^0.4.0, type-check@~0.4.0: version "0.4.0" resolved "https://registry.yarnpkg.com/type-check/-/type-check-0.4.0.tgz#07b8203bfa7056c0657050e3ccd2c37730bab8f1" diff --git a/matlab/src/cpp/arrow/matlab/array/proxy/array.cc b/matlab/src/cpp/arrow/matlab/array/proxy/array.cc index b8f85b08632a3..1eb6de74fec65 100644 --- a/matlab/src/cpp/arrow/matlab/array/proxy/array.cc +++ b/matlab/src/cpp/arrow/matlab/array/proxy/array.cc @@ -15,6 +15,7 @@ // specific language governing permissions and limitations // under the License. +#include "arrow/c/bridge.h" #include "arrow/util/utf8.h" #include "arrow/matlab/array/proxy/array.h" @@ -40,6 +41,7 @@ Array::Array(std::shared_ptr array) : array{std::move(array)} { REGISTER_METHOD(Array, getType); REGISTER_METHOD(Array, isEqual); REGISTER_METHOD(Array, slice); + REGISTER_METHOD(Array, exportToC); } std::shared_ptr Array::unwrap() { return array; } @@ -178,4 +180,20 @@ void Array::slice(libmexclass::proxy::method::Context& context) { output[0]["TypeID"] = factory.createScalar(type_id); context.outputs[0] = output; } + +void Array::exportToC(libmexclass::proxy::method::Context& context) { + namespace mda = ::matlab::data; + mda::StructArray opts = context.inputs[0]; + const mda::TypedArray array_address_mda = opts[0]["ArrowArrayAddress"]; + const mda::TypedArray schema_address_mda = opts[0]["ArrowSchemaAddress"]; + + auto arrow_array = reinterpret_cast(uint64_t(array_address_mda[0])); + auto arrow_schema = + reinterpret_cast(uint64_t(schema_address_mda[0])); + + MATLAB_ERROR_IF_NOT_OK_WITH_CONTEXT( + arrow::ExportArray(*array, arrow_array, arrow_schema), context, + error::C_EXPORT_FAILED); +} + } // namespace arrow::matlab::array::proxy diff --git a/matlab/src/cpp/arrow/matlab/array/proxy/array.h b/matlab/src/cpp/arrow/matlab/array/proxy/array.h index 61ba06a503bc4..c249693ac2797 100644 --- a/matlab/src/cpp/arrow/matlab/array/proxy/array.h +++ b/matlab/src/cpp/arrow/matlab/array/proxy/array.h @@ -45,6 +45,8 @@ class Array : public libmexclass::proxy::Proxy { void slice(libmexclass::proxy::method::Context& context); + void exportToC(libmexclass::proxy::method::Context& context); + std::shared_ptr array; }; diff --git a/matlab/src/cpp/arrow/matlab/c/proxy/array.cc b/matlab/src/cpp/arrow/matlab/c/proxy/array.cc new file mode 100644 index 0000000000000..a5f3418f1bcfa --- /dev/null +++ b/matlab/src/cpp/arrow/matlab/c/proxy/array.cc @@ -0,0 +1,49 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include +#include "arrow/c/abi.h" + +#include "arrow/matlab/c/proxy/array.h" + +#include "libmexclass/proxy/Proxy.h" + +namespace arrow::matlab::c::proxy { + +Array::Array() : arrowArray{} { REGISTER_METHOD(Array, getAddress); } + +Array::~Array() { + if (arrowArray.release != NULL) { + arrowArray.release(&arrowArray); + arrowArray.release = NULL; + } +} + +libmexclass::proxy::MakeResult Array::make( + const libmexclass::proxy::FunctionArguments& constructor_arguments) { + return std::make_shared(); +} + +void Array::getAddress(libmexclass::proxy::method::Context& context) { + namespace mda = ::matlab::data; + + mda::ArrayFactory factory; + auto address = reinterpret_cast(&arrowArray); + context.outputs[0] = factory.createScalar(address); +} + +} // namespace arrow::matlab::c::proxy \ No newline at end of file diff --git a/matlab/src/cpp/arrow/matlab/c/proxy/array.h b/matlab/src/cpp/arrow/matlab/c/proxy/array.h new file mode 100644 index 0000000000000..bb35807fcd015 --- /dev/null +++ b/matlab/src/cpp/arrow/matlab/c/proxy/array.h @@ -0,0 +1,39 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "arrow/c/abi.h" + +#include "libmexclass/proxy/Proxy.h" + +namespace arrow::matlab::c::proxy { + +class Array : public libmexclass::proxy::Proxy { + public: + Array(); + + ~Array(); + + static libmexclass::proxy::MakeResult make( + const libmexclass::proxy::FunctionArguments& constructor_arguments); + + protected: + void getAddress(libmexclass::proxy::method::Context& context); + + struct ArrowArray arrowArray; +}; + +} // namespace arrow::matlab::c::proxy diff --git a/matlab/src/cpp/arrow/matlab/c/proxy/array_importer.cc b/matlab/src/cpp/arrow/matlab/c/proxy/array_importer.cc new file mode 100644 index 0000000000000..b6f68332d1757 --- /dev/null +++ b/matlab/src/cpp/arrow/matlab/c/proxy/array_importer.cc @@ -0,0 +1,69 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "arrow/array.h" +#include "arrow/c/bridge.h" + +#include "arrow/matlab/array/proxy/wrap.h" +#include "arrow/matlab/c/proxy/array_importer.h" +#include "arrow/matlab/error/error.h" + +#include "libmexclass/proxy/ProxyManager.h" + +namespace arrow::matlab::c::proxy { + +ArrayImporter::ArrayImporter() { REGISTER_METHOD(ArrayImporter, import); } + +libmexclass::proxy::MakeResult ArrayImporter::make( + const libmexclass::proxy::FunctionArguments& constructor_arguments) { + return std::make_shared(); +} + +void ArrayImporter::import(libmexclass::proxy::method::Context& context) { + namespace mda = ::matlab::data; + using namespace libmexclass::proxy; + + mda::StructArray args = context.inputs[0]; + const mda::TypedArray arrow_array_address_mda = args[0]["ArrowArrayAddress"]; + const mda::TypedArray arrow_schema_address_mda = + args[0]["ArrowSchemaAddress"]; + + const auto arrow_array_address = uint64_t(arrow_array_address_mda[0]); + const auto arrow_schema_address = uint64_t(arrow_schema_address_mda[0]); + + auto arrow_array = reinterpret_cast(arrow_array_address); + auto arrow_schema = reinterpret_cast(arrow_schema_address); + + MATLAB_ASSIGN_OR_ERROR_WITH_CONTEXT(auto array, + arrow::ImportArray(arrow_array, arrow_schema), + context, error::C_IMPORT_FAILED); + + MATLAB_ASSIGN_OR_ERROR_WITH_CONTEXT(auto array_proxy, + arrow::matlab::array::proxy::wrap(array), context, + error::UNKNOWN_PROXY_FOR_ARRAY_TYPE); + + mda::ArrayFactory factory; + const auto array_proxy_id = ProxyManager::manageProxy(array_proxy); + const auto array_proxy_id_mda = factory.createScalar(array_proxy_id); + const auto array_type_id_mda = + factory.createScalar(static_cast(array->type_id())); + + context.outputs[0] = array_proxy_id_mda; + context.outputs[1] = array_type_id_mda; +} + +} // namespace arrow::matlab::c::proxy diff --git a/matlab/src/cpp/arrow/matlab/c/proxy/array_importer.h b/matlab/src/cpp/arrow/matlab/c/proxy/array_importer.h new file mode 100644 index 0000000000000..6459393058737 --- /dev/null +++ b/matlab/src/cpp/arrow/matlab/c/proxy/array_importer.h @@ -0,0 +1,37 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include "libmexclass/proxy/Proxy.h" + +namespace arrow::matlab::c::proxy { + +class ArrayImporter : public libmexclass::proxy::Proxy { + public: + ArrayImporter(); + + ~ArrayImporter() = default; + + static libmexclass::proxy::MakeResult make( + const libmexclass::proxy::FunctionArguments& constructor_arguments); + + protected: + void import(libmexclass::proxy::method::Context& context); +}; + +} // namespace arrow::matlab::c::proxy diff --git a/matlab/src/cpp/arrow/matlab/c/proxy/record_batch_importer.cc b/matlab/src/cpp/arrow/matlab/c/proxy/record_batch_importer.cc new file mode 100644 index 0000000000000..ed9ba14cfbe01 --- /dev/null +++ b/matlab/src/cpp/arrow/matlab/c/proxy/record_batch_importer.cc @@ -0,0 +1,66 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "arrow/c/bridge.h" + +#include "arrow/matlab/c/proxy/record_batch_importer.h" +#include "arrow/matlab/error/error.h" +#include "arrow/matlab/tabular/proxy/record_batch.h" + +#include "libmexclass/proxy/ProxyManager.h" + +namespace arrow::matlab::c::proxy { + +RecordBatchImporter::RecordBatchImporter() { + REGISTER_METHOD(RecordBatchImporter, import); +} + +libmexclass::proxy::MakeResult RecordBatchImporter::make( + const libmexclass::proxy::FunctionArguments& constructor_arguments) { + return std::make_shared(); +} + +void RecordBatchImporter::import(libmexclass::proxy::method::Context& context) { + namespace mda = ::matlab::data; + using namespace libmexclass::proxy; + using RecordBatchProxy = arrow::matlab::tabular::proxy::RecordBatch; + + mda::StructArray args = context.inputs[0]; + const mda::TypedArray arrow_array_address_mda = args[0]["ArrowArrayAddress"]; + const mda::TypedArray arrow_schema_address_mda = + args[0]["ArrowSchemaAddress"]; + + const auto arrow_array_address = uint64_t(arrow_array_address_mda[0]); + const auto arrow_schema_address = uint64_t(arrow_schema_address_mda[0]); + + auto arrow_array = reinterpret_cast(arrow_array_address); + auto arrow_schema = reinterpret_cast(arrow_schema_address); + + MATLAB_ASSIGN_OR_ERROR_WITH_CONTEXT(auto record_batch, + arrow::ImportRecordBatch(arrow_array, arrow_schema), + context, error::C_IMPORT_FAILED); + + auto record_batch_proxy = std::make_shared(std::move(record_batch)); + + mda::ArrayFactory factory; + const auto record_batch_proxy_id = ProxyManager::manageProxy(record_batch_proxy); + const auto record_batch_proxy_id_mda = factory.createScalar(record_batch_proxy_id); + + context.outputs[0] = record_batch_proxy_id_mda; +} + +} // namespace arrow::matlab::c::proxy diff --git a/matlab/src/cpp/arrow/matlab/c/proxy/record_batch_importer.h b/matlab/src/cpp/arrow/matlab/c/proxy/record_batch_importer.h new file mode 100644 index 0000000000000..0f697db0d25b0 --- /dev/null +++ b/matlab/src/cpp/arrow/matlab/c/proxy/record_batch_importer.h @@ -0,0 +1,37 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include "libmexclass/proxy/Proxy.h" + +namespace arrow::matlab::c::proxy { + +class RecordBatchImporter : public libmexclass::proxy::Proxy { + public: + RecordBatchImporter(); + + ~RecordBatchImporter() = default; + + static libmexclass::proxy::MakeResult make( + const libmexclass::proxy::FunctionArguments& constructor_arguments); + + protected: + void import(libmexclass::proxy::method::Context& context); +}; + +} // namespace arrow::matlab::c::proxy diff --git a/matlab/src/cpp/arrow/matlab/c/proxy/schema.cc b/matlab/src/cpp/arrow/matlab/c/proxy/schema.cc new file mode 100644 index 0000000000000..7f239f5628720 --- /dev/null +++ b/matlab/src/cpp/arrow/matlab/c/proxy/schema.cc @@ -0,0 +1,49 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include +#include "arrow/c/abi.h" + +#include "arrow/matlab/c/proxy/schema.h" + +#include "libmexclass/proxy/Proxy.h" + +namespace arrow::matlab::c::proxy { + +Schema::Schema() : arrowSchema{} { REGISTER_METHOD(Schema, getAddress); } + +Schema::~Schema() { + if (arrowSchema.release != NULL) { + arrowSchema.release(&arrowSchema); + arrowSchema.release = NULL; + } +} + +libmexclass::proxy::MakeResult Schema::make( + const libmexclass::proxy::FunctionArguments& constructor_arguments) { + return std::make_shared(); +} + +void Schema::getAddress(libmexclass::proxy::method::Context& context) { + namespace mda = ::matlab::data; + + mda::ArrayFactory factory; + auto address = reinterpret_cast(&arrowSchema); + context.outputs[0] = factory.createScalar(address); +} + +} // namespace arrow::matlab::c::proxy diff --git a/matlab/src/cpp/arrow/matlab/c/proxy/schema.h b/matlab/src/cpp/arrow/matlab/c/proxy/schema.h new file mode 100644 index 0000000000000..8f781ea9c7341 --- /dev/null +++ b/matlab/src/cpp/arrow/matlab/c/proxy/schema.h @@ -0,0 +1,39 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "arrow/c/abi.h" + +#include "libmexclass/proxy/Proxy.h" + +namespace arrow::matlab::c::proxy { + +class Schema : public libmexclass::proxy::Proxy { + public: + Schema(); + + ~Schema(); + + static libmexclass::proxy::MakeResult make( + const libmexclass::proxy::FunctionArguments& constructor_arguments); + + protected: + void getAddress(libmexclass::proxy::method::Context& context); + + struct ArrowSchema arrowSchema; +}; + +} // namespace arrow::matlab::c::proxy diff --git a/matlab/src/cpp/arrow/matlab/error/error.h b/matlab/src/cpp/arrow/matlab/error/error.h index db8b715141ee8..58c43d8843e4b 100644 --- a/matlab/src/cpp/arrow/matlab/error/error.h +++ b/matlab/src/cpp/arrow/matlab/error/error.h @@ -240,5 +240,7 @@ static const char* ARRAY_SLICE_NON_POSITIVE_OFFSET = static const char* ARRAY_SLICE_NEGATIVE_LENGTH = "arrow:array:slice:NegativeLength"; static const char* ARRAY_SLICE_FAILED_TO_CREATE_ARRAY_PROXY = "arrow:array:slice:FailedToCreateArrayProxy"; +static const char* C_EXPORT_FAILED = "arrow:c:export:ExportFailed"; +static const char* C_IMPORT_FAILED = "arrow:c:import:ImportFailed"; } // namespace arrow::matlab::error diff --git a/matlab/src/cpp/arrow/matlab/proxy/factory.cc b/matlab/src/cpp/arrow/matlab/proxy/factory.cc index 23492f75deacc..53a19da82e334 100644 --- a/matlab/src/cpp/arrow/matlab/proxy/factory.cc +++ b/matlab/src/cpp/arrow/matlab/proxy/factory.cc @@ -25,6 +25,10 @@ #include "arrow/matlab/array/proxy/time64_array.h" #include "arrow/matlab/array/proxy/timestamp_array.h" #include "arrow/matlab/buffer/proxy/buffer.h" +#include "arrow/matlab/c/proxy/array.h" +#include "arrow/matlab/c/proxy/array_importer.h" +#include "arrow/matlab/c/proxy/record_batch_importer.h" +#include "arrow/matlab/c/proxy/schema.h" #include "arrow/matlab/error/error.h" #include "arrow/matlab/io/csv/proxy/table_reader.h" #include "arrow/matlab/io/csv/proxy/table_writer.h" @@ -51,54 +55,58 @@ namespace arrow::matlab::proxy { libmexclass::proxy::MakeResult Factory::make_proxy( const ClassName& class_name, const FunctionArguments& constructor_arguments) { // clang-format off - REGISTER_PROXY(arrow.array.proxy.Float32Array , arrow::matlab::array::proxy::NumericArray); - REGISTER_PROXY(arrow.array.proxy.Float64Array , arrow::matlab::array::proxy::NumericArray); - REGISTER_PROXY(arrow.array.proxy.UInt8Array , arrow::matlab::array::proxy::NumericArray); - REGISTER_PROXY(arrow.array.proxy.UInt16Array , arrow::matlab::array::proxy::NumericArray); - REGISTER_PROXY(arrow.array.proxy.UInt32Array , arrow::matlab::array::proxy::NumericArray); - REGISTER_PROXY(arrow.array.proxy.UInt64Array , arrow::matlab::array::proxy::NumericArray); - REGISTER_PROXY(arrow.array.proxy.Int8Array , arrow::matlab::array::proxy::NumericArray); - REGISTER_PROXY(arrow.array.proxy.Int16Array , arrow::matlab::array::proxy::NumericArray); - REGISTER_PROXY(arrow.array.proxy.Int32Array , arrow::matlab::array::proxy::NumericArray); - REGISTER_PROXY(arrow.array.proxy.Int64Array , arrow::matlab::array::proxy::NumericArray); - REGISTER_PROXY(arrow.array.proxy.BooleanArray , arrow::matlab::array::proxy::BooleanArray); - REGISTER_PROXY(arrow.array.proxy.StringArray , arrow::matlab::array::proxy::StringArray); - REGISTER_PROXY(arrow.array.proxy.StructArray , arrow::matlab::array::proxy::StructArray); - REGISTER_PROXY(arrow.array.proxy.ListArray , arrow::matlab::array::proxy::ListArray); - REGISTER_PROXY(arrow.array.proxy.TimestampArray, arrow::matlab::array::proxy::NumericArray); - REGISTER_PROXY(arrow.array.proxy.Time32Array , arrow::matlab::array::proxy::NumericArray); - REGISTER_PROXY(arrow.array.proxy.Time64Array , arrow::matlab::array::proxy::NumericArray); - REGISTER_PROXY(arrow.array.proxy.Date32Array , arrow::matlab::array::proxy::NumericArray); - REGISTER_PROXY(arrow.array.proxy.Date64Array , arrow::matlab::array::proxy::NumericArray); - REGISTER_PROXY(arrow.array.proxy.ChunkedArray , arrow::matlab::array::proxy::ChunkedArray); - REGISTER_PROXY(arrow.buffer.proxy.Buffer , arrow::matlab::buffer::proxy::Buffer); - REGISTER_PROXY(arrow.tabular.proxy.RecordBatch , arrow::matlab::tabular::proxy::RecordBatch); - REGISTER_PROXY(arrow.tabular.proxy.Table , arrow::matlab::tabular::proxy::Table); - REGISTER_PROXY(arrow.tabular.proxy.Schema , arrow::matlab::tabular::proxy::Schema); - REGISTER_PROXY(arrow.type.proxy.Field , arrow::matlab::type::proxy::Field); - REGISTER_PROXY(arrow.type.proxy.Float32Type , arrow::matlab::type::proxy::PrimitiveCType); - REGISTER_PROXY(arrow.type.proxy.Float64Type , arrow::matlab::type::proxy::PrimitiveCType); - REGISTER_PROXY(arrow.type.proxy.UInt8Type , arrow::matlab::type::proxy::PrimitiveCType); - REGISTER_PROXY(arrow.type.proxy.UInt16Type , arrow::matlab::type::proxy::PrimitiveCType); - REGISTER_PROXY(arrow.type.proxy.UInt32Type , arrow::matlab::type::proxy::PrimitiveCType); - REGISTER_PROXY(arrow.type.proxy.UInt64Type , arrow::matlab::type::proxy::PrimitiveCType); - REGISTER_PROXY(arrow.type.proxy.Int8Type , arrow::matlab::type::proxy::PrimitiveCType); - REGISTER_PROXY(arrow.type.proxy.Int16Type , arrow::matlab::type::proxy::PrimitiveCType); - REGISTER_PROXY(arrow.type.proxy.Int32Type , arrow::matlab::type::proxy::PrimitiveCType); - REGISTER_PROXY(arrow.type.proxy.Int64Type , arrow::matlab::type::proxy::PrimitiveCType); - REGISTER_PROXY(arrow.type.proxy.BooleanType , arrow::matlab::type::proxy::PrimitiveCType); - REGISTER_PROXY(arrow.type.proxy.StringType , arrow::matlab::type::proxy::StringType); - REGISTER_PROXY(arrow.type.proxy.TimestampType , arrow::matlab::type::proxy::TimestampType); - REGISTER_PROXY(arrow.type.proxy.Time32Type , arrow::matlab::type::proxy::Time32Type); - REGISTER_PROXY(arrow.type.proxy.Time64Type , arrow::matlab::type::proxy::Time64Type); - REGISTER_PROXY(arrow.type.proxy.Date32Type , arrow::matlab::type::proxy::Date32Type); - REGISTER_PROXY(arrow.type.proxy.Date64Type , arrow::matlab::type::proxy::Date64Type); - REGISTER_PROXY(arrow.type.proxy.StructType , arrow::matlab::type::proxy::StructType); - REGISTER_PROXY(arrow.type.proxy.ListType , arrow::matlab::type::proxy::ListType); - REGISTER_PROXY(arrow.io.feather.proxy.Writer , arrow::matlab::io::feather::proxy::Writer); - REGISTER_PROXY(arrow.io.feather.proxy.Reader , arrow::matlab::io::feather::proxy::Reader); - REGISTER_PROXY(arrow.io.csv.proxy.TableWriter , arrow::matlab::io::csv::proxy::TableWriter); - REGISTER_PROXY(arrow.io.csv.proxy.TableReader , arrow::matlab::io::csv::proxy::TableReader); + REGISTER_PROXY(arrow.array.proxy.Float32Array , arrow::matlab::array::proxy::NumericArray); + REGISTER_PROXY(arrow.array.proxy.Float64Array , arrow::matlab::array::proxy::NumericArray); + REGISTER_PROXY(arrow.array.proxy.UInt8Array , arrow::matlab::array::proxy::NumericArray); + REGISTER_PROXY(arrow.array.proxy.UInt16Array , arrow::matlab::array::proxy::NumericArray); + REGISTER_PROXY(arrow.array.proxy.UInt32Array , arrow::matlab::array::proxy::NumericArray); + REGISTER_PROXY(arrow.array.proxy.UInt64Array , arrow::matlab::array::proxy::NumericArray); + REGISTER_PROXY(arrow.array.proxy.Int8Array , arrow::matlab::array::proxy::NumericArray); + REGISTER_PROXY(arrow.array.proxy.Int16Array , arrow::matlab::array::proxy::NumericArray); + REGISTER_PROXY(arrow.array.proxy.Int32Array , arrow::matlab::array::proxy::NumericArray); + REGISTER_PROXY(arrow.array.proxy.Int64Array , arrow::matlab::array::proxy::NumericArray); + REGISTER_PROXY(arrow.array.proxy.BooleanArray , arrow::matlab::array::proxy::BooleanArray); + REGISTER_PROXY(arrow.array.proxy.StringArray , arrow::matlab::array::proxy::StringArray); + REGISTER_PROXY(arrow.array.proxy.StructArray , arrow::matlab::array::proxy::StructArray); + REGISTER_PROXY(arrow.array.proxy.ListArray , arrow::matlab::array::proxy::ListArray); + REGISTER_PROXY(arrow.array.proxy.TimestampArray , arrow::matlab::array::proxy::NumericArray); + REGISTER_PROXY(arrow.array.proxy.Time32Array , arrow::matlab::array::proxy::NumericArray); + REGISTER_PROXY(arrow.array.proxy.Time64Array , arrow::matlab::array::proxy::NumericArray); + REGISTER_PROXY(arrow.array.proxy.Date32Array , arrow::matlab::array::proxy::NumericArray); + REGISTER_PROXY(arrow.array.proxy.Date64Array , arrow::matlab::array::proxy::NumericArray); + REGISTER_PROXY(arrow.array.proxy.ChunkedArray , arrow::matlab::array::proxy::ChunkedArray); + REGISTER_PROXY(arrow.buffer.proxy.Buffer , arrow::matlab::buffer::proxy::Buffer); + REGISTER_PROXY(arrow.tabular.proxy.RecordBatch , arrow::matlab::tabular::proxy::RecordBatch); + REGISTER_PROXY(arrow.tabular.proxy.Table , arrow::matlab::tabular::proxy::Table); + REGISTER_PROXY(arrow.tabular.proxy.Schema , arrow::matlab::tabular::proxy::Schema); + REGISTER_PROXY(arrow.type.proxy.Field , arrow::matlab::type::proxy::Field); + REGISTER_PROXY(arrow.type.proxy.Float32Type , arrow::matlab::type::proxy::PrimitiveCType); + REGISTER_PROXY(arrow.type.proxy.Float64Type , arrow::matlab::type::proxy::PrimitiveCType); + REGISTER_PROXY(arrow.type.proxy.UInt8Type , arrow::matlab::type::proxy::PrimitiveCType); + REGISTER_PROXY(arrow.type.proxy.UInt16Type , arrow::matlab::type::proxy::PrimitiveCType); + REGISTER_PROXY(arrow.type.proxy.UInt32Type , arrow::matlab::type::proxy::PrimitiveCType); + REGISTER_PROXY(arrow.type.proxy.UInt64Type , arrow::matlab::type::proxy::PrimitiveCType); + REGISTER_PROXY(arrow.type.proxy.Int8Type , arrow::matlab::type::proxy::PrimitiveCType); + REGISTER_PROXY(arrow.type.proxy.Int16Type , arrow::matlab::type::proxy::PrimitiveCType); + REGISTER_PROXY(arrow.type.proxy.Int32Type , arrow::matlab::type::proxy::PrimitiveCType); + REGISTER_PROXY(arrow.type.proxy.Int64Type , arrow::matlab::type::proxy::PrimitiveCType); + REGISTER_PROXY(arrow.type.proxy.BooleanType , arrow::matlab::type::proxy::PrimitiveCType); + REGISTER_PROXY(arrow.type.proxy.StringType , arrow::matlab::type::proxy::StringType); + REGISTER_PROXY(arrow.type.proxy.TimestampType , arrow::matlab::type::proxy::TimestampType); + REGISTER_PROXY(arrow.type.proxy.Time32Type , arrow::matlab::type::proxy::Time32Type); + REGISTER_PROXY(arrow.type.proxy.Time64Type , arrow::matlab::type::proxy::Time64Type); + REGISTER_PROXY(arrow.type.proxy.Date32Type , arrow::matlab::type::proxy::Date32Type); + REGISTER_PROXY(arrow.type.proxy.Date64Type , arrow::matlab::type::proxy::Date64Type); + REGISTER_PROXY(arrow.type.proxy.StructType , arrow::matlab::type::proxy::StructType); + REGISTER_PROXY(arrow.type.proxy.ListType , arrow::matlab::type::proxy::ListType); + REGISTER_PROXY(arrow.io.feather.proxy.Writer , arrow::matlab::io::feather::proxy::Writer); + REGISTER_PROXY(arrow.io.feather.proxy.Reader , arrow::matlab::io::feather::proxy::Reader); + REGISTER_PROXY(arrow.io.csv.proxy.TableWriter , arrow::matlab::io::csv::proxy::TableWriter); + REGISTER_PROXY(arrow.io.csv.proxy.TableReader , arrow::matlab::io::csv::proxy::TableReader); + REGISTER_PROXY(arrow.c.proxy.Array , arrow::matlab::c::proxy::Array); + REGISTER_PROXY(arrow.c.proxy.ArrayImporter , arrow::matlab::c::proxy::ArrayImporter); + REGISTER_PROXY(arrow.c.proxy.Schema , arrow::matlab::c::proxy::Schema); + REGISTER_PROXY(arrow.c.proxy.RecordBatchImporter , arrow::matlab::c::proxy::RecordBatchImporter); // clang-format on return libmexclass::error::Error{error::UNKNOWN_PROXY_ERROR_ID, diff --git a/matlab/src/cpp/arrow/matlab/tabular/proxy/record_batch.cc b/matlab/src/cpp/arrow/matlab/tabular/proxy/record_batch.cc index 298ac4b595139..f3cee25a3a8ee 100644 --- a/matlab/src/cpp/arrow/matlab/tabular/proxy/record_batch.cc +++ b/matlab/src/cpp/arrow/matlab/tabular/proxy/record_batch.cc @@ -15,8 +15,7 @@ // specific language governing permissions and limitations // under the License. -#include "libmexclass/proxy/ProxyManager.h" - +#include "arrow/c/bridge.h" #include "arrow/matlab/array/proxy/array.h" #include "arrow/matlab/array/proxy/wrap.h" @@ -66,6 +65,7 @@ RecordBatch::RecordBatch(std::shared_ptr record_batch) REGISTER_METHOD(RecordBatch, getColumnByName); REGISTER_METHOD(RecordBatch, getSchema); REGISTER_METHOD(RecordBatch, getRowAsString); + REGISTER_METHOD(RecordBatch, exportToC); } std::shared_ptr RecordBatch::unwrap() { return record_batch; } @@ -259,4 +259,19 @@ void RecordBatch::getRowAsString(libmexclass::proxy::method::Context& context) { context.outputs[0] = factory.createScalar(row_str_utf16); } +void RecordBatch::exportToC(libmexclass::proxy::method::Context& context) { + namespace mda = ::matlab::data; + mda::StructArray opts = context.inputs[0]; + const mda::TypedArray array_address_mda = opts[0]["ArrowArrayAddress"]; + const mda::TypedArray schema_address_mda = opts[0]["ArrowSchemaAddress"]; + + auto arrow_array = reinterpret_cast(uint64_t(array_address_mda[0])); + auto arrow_schema = + reinterpret_cast(uint64_t(schema_address_mda[0])); + + MATLAB_ERROR_IF_NOT_OK_WITH_CONTEXT( + arrow::ExportRecordBatch(*record_batch, arrow_array, arrow_schema), context, + error::C_EXPORT_FAILED); +} + } // namespace arrow::matlab::tabular::proxy diff --git a/matlab/src/cpp/arrow/matlab/tabular/proxy/record_batch.h b/matlab/src/cpp/arrow/matlab/tabular/proxy/record_batch.h index c8285c9b095d5..4a1675a8a438a 100644 --- a/matlab/src/cpp/arrow/matlab/tabular/proxy/record_batch.h +++ b/matlab/src/cpp/arrow/matlab/tabular/proxy/record_batch.h @@ -43,6 +43,7 @@ class RecordBatch : public libmexclass::proxy::Proxy { void getColumnByName(libmexclass::proxy::method::Context& context); void getSchema(libmexclass::proxy::method::Context& context); void getRowAsString(libmexclass::proxy::method::Context& context); + void exportToC(libmexclass::proxy::method::Context& context); std::shared_ptr record_batch; }; diff --git a/matlab/src/matlab/+arrow/+array/Array.m b/matlab/src/matlab/+arrow/+array/Array.m index 4402055932b60..01bacdf5755dc 100644 --- a/matlab/src/matlab/+arrow/+array/Array.m +++ b/matlab/src/matlab/+arrow/+array/Array.m @@ -97,6 +97,19 @@ function displayScalarObject(obj) % Invoke isEqual proxy object method tf = obj.Proxy.isEqual(proxyIDs); end + + function export(obj, cArrowArrayAddress, cArrowSchemaAddress) + arguments + obj(1, 1) arrow.array.Array + cArrowArrayAddress(1, 1) uint64 + cArrowSchemaAddress(1, 1) uint64 + end + args = struct(... + ArrowArrayAddress=cArrowArrayAddress,... + ArrowSchemaAddress=cArrowSchemaAddress... + ); + obj.Proxy.exportToC(args); + end end methods (Hidden) @@ -108,4 +121,15 @@ function displayScalarObject(obj) array = traits.ArrayConstructor(proxy); end end + + methods (Static) + function array = import(cArray, cSchema) + arguments + cArray(1, 1) arrow.c.Array + cSchema(1, 1) arrow.c.Schema + end + importer = arrow.c.internal.ArrayImporter(); + array = importer.import(cArray, cSchema); + end + end end diff --git a/matlab/src/matlab/+arrow/+c/+internal/ArrayImporter.m b/matlab/src/matlab/+arrow/+c/+internal/ArrayImporter.m new file mode 100644 index 0000000000000..3f2f7445b3d6d --- /dev/null +++ b/matlab/src/matlab/+arrow/+c/+internal/ArrayImporter.m @@ -0,0 +1,50 @@ +%ARRAYIMPORTER Imports Arrow Array using the C Data Interface Format. + +% Licensed to the Apache Software Foundation (ASF) under one or more +% contributor license agreements. See the NOTICE file distributed with +% this work for additional information regarding copyright ownership. +% The ASF licenses this file to you under the Apache License, Version +% 2.0 (the "License"); you may not use this file except in compliance +% with the License. You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +% implied. See the License for the specific language governing +% permissions and limitations under the License. +classdef ArrayImporter < matlab.mixin.Scalar + + properties (Hidden, SetAccess=private, GetAccess=public) + Proxy + end + + methods + + function obj = ArrayImporter() + proxyName = "arrow.c.proxy.ArrayImporter"; + proxy = arrow.internal.proxy.create(proxyName, struct()); + obj.Proxy = proxy; + end + + function array = import(obj, cArray, cSchema) + arguments + obj(1, 1) arrow.c.internal.ArrayImporter + cArray(1, 1) arrow.c.Array + cSchema(1, 1) arrow.c.Schema + end + args = struct(... + ArrowArrayAddress=cArray.Address,... + ArrowSchemaAddress=cSchema.Address... + ); + [proxyID, typeID] = obj.Proxy.import(args); + traits = arrow.type.traits.traits(arrow.type.ID(typeID)); + proxy = libmexclass.proxy.Proxy(Name=traits.ArrayProxyClassName, ID=proxyID); + array = traits.ArrayConstructor(proxy); + end + + end + +end + diff --git a/matlab/src/matlab/+arrow/+c/+internal/RecordBatchImporter.m b/matlab/src/matlab/+arrow/+c/+internal/RecordBatchImporter.m new file mode 100644 index 0000000000000..120763bb46e7b --- /dev/null +++ b/matlab/src/matlab/+arrow/+c/+internal/RecordBatchImporter.m @@ -0,0 +1,52 @@ +%RECORDBATCHIMPORTER Imports Arrow RecordBatch using the C Data Interface +% Format. + +% Licensed to the Apache Software Foundation (ASF) under one or more +% contributor license agreements. See the NOTICE file distributed with +% this work for additional information regarding copyright ownership. +% The ASF licenses this file to you under the Apache License, Version +% 2.0 (the "License"); you may not use this file except in compliance +% with the License. You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +% implied. See the License for the specific language governing +% permissions and limitations under the License. + +classdef RecordBatchImporter + + properties (Hidden, SetAccess=private, GetAccess=public) + Proxy + end + + methods + + function obj = RecordBatchImporter() + proxyName = "arrow.c.proxy.RecordBatchImporter"; + proxy = arrow.internal.proxy.create(proxyName, struct()); + obj.Proxy = proxy; + end + + function recordBatch = import(obj, cArray, cSchema) + arguments + obj(1, 1) arrow.c.internal.RecordBatchImporter + cArray(1, 1) arrow.c.Array + cSchema(1, 1) arrow.c.Schema + end + args = struct(... + ArrowArrayAddress=cArray.Address,... + ArrowSchemaAddress=cSchema.Address... + ); + proxyID = obj.Proxy.import(args); + proxyName = "arrow.tabular.proxy.RecordBatch"; + proxy = libmexclass.proxy.Proxy(Name=proxyName, ID=proxyID); + recordBatch = arrow.tabular.RecordBatch(proxy); + end + + end + +end + diff --git a/matlab/src/matlab/+arrow/+c/Array.m b/matlab/src/matlab/+arrow/+c/Array.m new file mode 100644 index 0000000000000..574fca9afebd8 --- /dev/null +++ b/matlab/src/matlab/+arrow/+c/Array.m @@ -0,0 +1,37 @@ +%ARRAY Wrapper for an Arrow C Data Interface format ArrowArray C struct pointer. + +% Licensed to the Apache Software Foundation (ASF) under one or more +% contributor license agreements. See the NOTICE file distributed with +% this work for additional information regarding copyright ownership. +% The ASF licenses this file to you under the Apache License, Version +% 2.0 (the "License"); you may not use this file except in compliance +% with the License. You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +% implied. See the License for the specific language governing +% permissions and limitations under the License. +classdef Array < matlab.mixin.Scalar + + properties (Hidden, SetAccess=private, GetAccess=public) + Proxy + end + + properties(Dependent, GetAccess=public, SetAccess=private) + Address(1, 1) uint64 + end + + methods + function obj = Array() + proxyName = "arrow.c.proxy.Array"; + obj.Proxy = arrow.internal.proxy.create(proxyName); + end + + function address = get.Address(obj) + address = obj.Proxy.getAddress(); + end + end +end \ No newline at end of file diff --git a/matlab/src/matlab/+arrow/+c/Schema.m b/matlab/src/matlab/+arrow/+c/Schema.m new file mode 100644 index 0000000000000..29eba59016044 --- /dev/null +++ b/matlab/src/matlab/+arrow/+c/Schema.m @@ -0,0 +1,37 @@ +%SCHEMA Wrapper for an Arrow C Data Interface format ArrowSchema C struct pointer. + +% Licensed to the Apache Software Foundation (ASF) under one or more +% contributor license agreements. See the NOTICE file distributed with +% this work for additional information regarding copyright ownership. +% The ASF licenses this file to you under the Apache License, Version +% 2.0 (the "License"); you may not use this file except in compliance +% with the License. You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +% implied. See the License for the specific language governing +% permissions and limitations under the License. +classdef Schema < matlab.mixin.Scalar + + properties (Hidden, SetAccess=private, GetAccess=public) + Proxy + end + + properties(Dependent, GetAccess=public, SetAccess=private) + Address(1, 1) uint64 + end + + methods + function obj = Schema() + proxyName = "arrow.c.proxy.Schema"; + obj.Proxy = arrow.internal.proxy.create(proxyName); + end + + function address = get.Address(obj) + address = obj.Proxy.getAddress(); + end + end +end \ No newline at end of file diff --git a/matlab/src/matlab/+arrow/+internal/+test/+display/makeLinkString.m b/matlab/src/matlab/+arrow/+internal/+test/+display/makeLinkString.m index 79065ba1c8cfd..e99dd7d78488d 100644 --- a/matlab/src/matlab/+arrow/+internal/+test/+display/makeLinkString.m +++ b/matlab/src/matlab/+arrow/+internal/+test/+display/makeLinkString.m @@ -26,11 +26,11 @@ end if opts.BoldFont - link = compose("%s", ... opts.FullClassName, opts.ClassName); else - link = compose("%s", ... + link = compose("%s", ... opts.FullClassName, opts.ClassName); end end \ No newline at end of file diff --git a/matlab/src/matlab/+arrow/+tabular/+internal/+display/getSchemaString.m b/matlab/src/matlab/+arrow/+tabular/+internal/+display/getSchemaString.m index 7da945ca993ef..724b4873c92e1 100644 --- a/matlab/src/matlab/+arrow/+tabular/+internal/+display/getSchemaString.m +++ b/matlab/src/matlab/+arrow/+tabular/+internal/+display/getSchemaString.m @@ -43,7 +43,7 @@ classNameAndIDs = strings([1 numel(typeIDs) * 2]); classNameAndIDs(1:2:end-1) = classNames; classNameAndIDs(2:2:end) = typeIDs; - typeIDs = compose("%s", classNameAndIDs); + typeIDs = compose("%s", classNameAndIDs); end text = names + ": " + typeIDs; diff --git a/matlab/src/matlab/+arrow/+tabular/RecordBatch.m b/matlab/src/matlab/+arrow/+tabular/RecordBatch.m index 0225f3d771181..da5c1fc1c3764 100644 --- a/matlab/src/matlab/+arrow/+tabular/RecordBatch.m +++ b/matlab/src/matlab/+arrow/+tabular/RecordBatch.m @@ -102,6 +102,19 @@ function tf = isequal(obj, varargin) tf = arrow.tabular.internal.isequal(obj, varargin{:}); end + + function export(obj, cArrowArrayAddress, cArrowSchemaAddress) + arguments + obj(1, 1) arrow.tabular.RecordBatch + cArrowArrayAddress(1, 1) uint64 + cArrowSchemaAddress(1, 1) uint64 + end + args = struct(... + ArrowArrayAddress=cArrowArrayAddress,... + ArrowSchemaAddress=cArrowSchemaAddress... + ); + obj.Proxy.exportToC(args); + end end methods (Access = private) @@ -141,5 +154,14 @@ function displayScalarObject(obj) proxy = arrow.internal.proxy.create(proxyName, args); recordBatch = arrow.tabular.RecordBatch(proxy); end + + function recordBatch = import(cArray, cSchema) + arguments + cArray(1, 1) arrow.c.Array + cSchema(1, 1) arrow.c.Schema + end + importer = arrow.c.internal.RecordBatchImporter(); + recordBatch = importer.import(cArray, cSchema); + end end end diff --git a/matlab/test/arrow/c/tArray.m b/matlab/test/arrow/c/tArray.m new file mode 100644 index 0000000000000..f8caf48065114 --- /dev/null +++ b/matlab/test/arrow/c/tArray.m @@ -0,0 +1,48 @@ +%TARRAY Defines unit tests for arrow.c.Array. + +% Licensed to the Apache Software Foundation (ASF) under one or more +% contributor license agreements. See the NOTICE file distributed with +% this work for additional information regarding copyright ownership. +% The ASF licenses this file to you under the Apache License, Version +% 2.0 (the "License"); you may not use this file except in compliance +% with the License. You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +% implied. See the License for the specific language governing +% permissions and limitations under the License. +classdef tArray < matlab.unittest.TestCase + + methods (Test) + function TestClassStructure(testCase) + array = arrow.c.Array(); + + % Verify array is an instance of arrow.c.Array. + testCase.verifyInstanceOf(array, "arrow.c.Array"); + + % Verify array has one public property named Address. + props = properties(array); + testCase.verifyEqual(props, {'Address'}); + end + + function TestAddressProperty(testCase) + array = arrow.c.Array(); + + % It's impossible to know what the value of Address will be. + % Just verify Address is a scalar uint64. + address = array.Address; + testCase.verifyInstanceOf(address, "uint64"); + testCase.verifyTrue(isscalar(address)); + end + + function TestAddressNoSetter(testCase) + % Verify the Address property is read-only. + array = arrow.c.Array(); + fcn = @() setfield(array, "Address", uint64(10)); + testCase.verifyError(fcn, "MATLAB:class:SetProhibited"); + end + end +end \ No newline at end of file diff --git a/matlab/test/arrow/c/tRoundTrip.m b/matlab/test/arrow/c/tRoundTrip.m new file mode 100644 index 0000000000000..a72dbe2679a2d --- /dev/null +++ b/matlab/test/arrow/c/tRoundTrip.m @@ -0,0 +1,182 @@ +%TROUNDTRIP Tests for roundtripping using the C Data Interface format. + +% Licensed to the Apache Software Foundation (ASF) under one or more +% contributor license agreements. See the NOTICE file distributed with +% this work for additional information regarding copyright ownership. +% The ASF licenses this file to you under the Apache License, Version +% 2.0 (the "License"); you may not use this file except in compliance +% with the License. You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +% implied. See the License for the specific language governing +% permissions and limitations under the License. +classdef tRoundTrip < matlab.unittest.TestCase + + methods (Test) + + function EmptyArray(testCase) + expected = arrow.array(double.empty(0, 1)); + cArray = arrow.c.Array(); + cSchema = arrow.c.Schema(); + + expected.export(cArray.Address, cSchema.Address); + actual = arrow.array.Array.import(cArray, cSchema); + + testCase.verifyEqual(actual, expected); + end + + function ArrayWithNulls(testCase) + % Scalar null + expected = arrow.array(double(NaN)); + cArray = arrow.c.Array(); + cSchema = arrow.c.Schema(); + + expected.export(cArray.Address, cSchema.Address); + actual = arrow.array.Array.import(cArray, cSchema); + + testCase.verifyEqual(actual, expected); + + % Vector with nulls + expected = arrow.array([1, NaN, 3, NaN, 5]); + cArray = arrow.c.Array(); + cSchema = arrow.c.Schema(); + + expected.export(cArray.Address, cSchema.Address); + actual = arrow.array.Array.import(cArray, cSchema); + + testCase.verifyEqual(actual, expected); + + % Vector all nulls + expected = arrow.array([NaN, NaN, NaN]); + cArray = arrow.c.Array(); + cSchema = arrow.c.Schema(); + + expected.export(cArray.Address, cSchema.Address); + actual = arrow.array.Array.import(cArray, cSchema); + + testCase.verifyEqual(actual, expected); + end + + function Float64Array(testCase) + % Scalar + expected = arrow.array(double(1)); + cArray = arrow.c.Array(); + cSchema = arrow.c.Schema(); + + expected.export(cArray.Address, cSchema.Address); + actual = arrow.array.Array.import(cArray, cSchema); + + testCase.verifyEqual(actual, expected); + + % Vector + expected = arrow.array([1, 2, 3]); + cArray = arrow.c.Array(); + cSchema = arrow.c.Schema(); + + expected.export(cArray.Address, cSchema.Address); + actual = arrow.array.Array.import(cArray, cSchema); + + testCase.verifyEqual(actual, expected); + end + + function StringArray(testCase) + % Scalar + expected = arrow.array("A"); + cArray = arrow.c.Array(); + cSchema = arrow.c.Schema(); + + expected.export(cArray.Address, cSchema.Address); + actual = arrow.array.Array.import(cArray, cSchema); + + testCase.verifyEqual(actual, expected); + + % Vector + expected = arrow.array(["A", "B", "C"]); + cArray = arrow.c.Array(); + cSchema = arrow.c.Schema(); + + expected.export(cArray.Address, cSchema.Address); + actual = arrow.array.Array.import(cArray, cSchema); + + testCase.verifyEqual(actual, expected); + end + + function TimestampArray(testCase) + % Scalar + expected = arrow.array(datetime(2024, 1, 1)); + cArray = arrow.c.Array(); + cSchema = arrow.c.Schema(); + + expected.export(cArray.Address, cSchema.Address); + actual = arrow.array.Array.import(cArray, cSchema); + + testCase.verifyEqual(actual, expected); + + % Vector + expected = arrow.array([... + datetime(2024, 1, 1),... + datetime(2024, 1, 2),... + datetime(2024, 1, 3)... + ]); + cArray = arrow.c.Array(); + cSchema = arrow.c.Schema(); + + expected.export(cArray.Address, cSchema.Address); + actual = arrow.array.Array.import(cArray, cSchema); + + testCase.verifyEqual(actual, expected); + end + + function ExportErrorWrongInputTypes(testCase) + A = arrow.array([1, 2, 3]); + fcn = @() A.export("cArray.Address", "cSchema.Address"); + testCase.verifyError(fcn, "MATLAB:validation:UnableToConvert"); + end + + function ExportTooFewInputs(testCase) + A = arrow.array([1, 2, 3]); + fcn = @() A.export(); + testCase.verifyError(fcn, "MATLAB:minrhs"); + end + + function ExportTooManyInputs(testCase) + A = arrow.array([1, 2, 3]); + fcn = @() A.export("A", "B", "C"); + testCase.verifyError(fcn, "MATLAB:TooManyInputs"); + end + + function ImportErrorWrongInputTypes(testCase) + cArray = "arrow.c.Array"; + cSchema = "arrow.c.Schema"; + fcn = @() arrow.array.Array.import(cArray, cSchema); + testCase.verifyError(fcn, "MATLAB:validation:UnableToConvert"); + end + + function ImportTooFewInputs(testCase) + fcn = @() arrow.array.Array.import(); + testCase.verifyError(fcn, "MATLAB:minrhs"); + end + + function ImportTooManyInputs(testCase) + A = arrow.array([1, 2, 3]); + fcn = @() arrow.array.Array.import("A", "B", "C"); + testCase.verifyError(fcn, "MATLAB:TooManyInputs"); + end + + function ImportErrorImportFailed(testCase) + cArray = arrow.c.Array(); + cSchema = arrow.c.Schema(); + % An arrow:c:import:ImportFailed error should be thrown + % if the supplied arrow.c.Array and arrow.c.Schema were + % never populated previously from an exported Array. + fcn = @() arrow.array.Array.import(cArray, cSchema); + testCase.verifyError(fcn, "arrow:c:import:ImportFailed"); + end + + end + +end diff --git a/matlab/test/arrow/c/tRoundTripRecordBatch.m b/matlab/test/arrow/c/tRoundTripRecordBatch.m new file mode 100644 index 0000000000000..5d95aecbe1603 --- /dev/null +++ b/matlab/test/arrow/c/tRoundTripRecordBatch.m @@ -0,0 +1,170 @@ +%TROUNDTRIPRECORDBATCH Tests for roundtripping RecordBatches using +% the C Data Interface format. + +% Licensed to the Apache Software Foundation (ASF) under one or more +% contributor license agreements. See the NOTICE file distributed with +% this work for additional information regarding copyright ownership. +% The ASF licenses this file to you under the Apache License, Version +% 2.0 (the "License"); you may not use this file except in compliance +% with the License. You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +% implied. See the License for the specific language governing +% permissions and limitations under the License. +classdef tRoundTripRecordBatch < matlab.unittest.TestCase + + methods (Test) + function ZeroColumnRecordBatch(testCase) + expected = arrow.recordBatch(table()); + + cArray = arrow.c.Array(); + cSchema = arrow.c.Schema(); + expected.export(cArray.Address, cSchema.Address); + actual = arrow.tabular.RecordBatch.import(cArray, cSchema); + + testCase.verifyEqual(actual, expected); + end + + function ZeroRowRecordBatch(testCase) + doubleArray = arrow.array([]); + stringArray = arrow.array(string.empty(0, 0)); + expected = arrow.tabular.RecordBatch.fromArrays(doubleArray, stringArray); + + cArray = arrow.c.Array(); + cSchema = arrow.c.Schema(); + expected.export(cArray.Address, cSchema.Address); + actual = arrow.tabular.RecordBatch.import(cArray, cSchema); + + testCase.verifyEqual(actual, expected); + end + + function OneRowRecordBatch(testCase) + varNames = ["Col1" "Col2" "Col3"]; + t = table(1, "A", false, VariableNames=varNames); + expected = arrow.recordBatch(t); + + cArray = arrow.c.Array(); + cSchema = arrow.c.Schema(); + expected.export(cArray.Address, cSchema.Address); + actual = arrow.tabular.RecordBatch.import(cArray, cSchema); + + testCase.verifyEqual(actual, expected); + end + + function MultiRowRecordBatch(testCase) + varNames = ["Col1" "Col2" "Col3"]; + t = table((1:3)', ["A"; "B"; "C"], [false; true; false],... + VariableNames=varNames); + expected = arrow.recordBatch(t); + + cArray = arrow.c.Array(); + cSchema = arrow.c.Schema(); + expected.export(cArray.Address, cSchema.Address); + actual = arrow.tabular.RecordBatch.import(cArray, cSchema); + + testCase.verifyEqual(actual, expected); + end + + function ExportErrorWrongInputTypes(testCase) + rb = arrow.recordBatch(table([1; 2; 3])); + fcn = @() rb.export("cArray.Address", "cSchema.Address"); + testCase.verifyError(fcn, "MATLAB:validation:UnableToConvert"); + end + + function ExportTooFewInputs(testCase) + rb = arrow.recordBatch(table([1; 2; 3])); + fcn = @() rb.export(); + testCase.verifyError(fcn, "MATLAB:minrhs"); + end + + function ExportTooManyInputs(testCase) + rb = arrow.recordBatch(table([1; 2; 3])); + fcn = @() rb.export("A", "B", "C"); + testCase.verifyError(fcn, "MATLAB:TooManyInputs"); + end + + function ImportErrorWrongInputTypes(testCase) + cArray = "arrow.c.Array"; + cSchema = "arrow.c.Schema"; + fcn = @() arrow.tabular.RecordBatch.import(cArray, cSchema); + testCase.verifyError(fcn, "MATLAB:validation:UnableToConvert"); + end + + function ImportTooFewInputs(testCase) + fcn = @() arrow.tabular.RecordBatch.import(); + testCase.verifyError(fcn, "MATLAB:minrhs"); + end + + function ImportTooManyInputs(testCase) + fcn = @() arrow.tabular.RecordBatch.import("A", "B", "C"); + testCase.verifyError(fcn, "MATLAB:TooManyInputs"); + end + + function ImportErrorImportFailed(testCase) + cArray = arrow.c.Array(); + cSchema = arrow.c.Schema(); + % An arrow:c:import:ImportFailed error should be thrown + % if the supplied arrow.c.Array and arrow.c.Schema were + % never populated previously from an exported Array. + fcn = @() arrow.tabular.RecordBatch.import(cArray, cSchema); + testCase.verifyError(fcn, "arrow:c:import:ImportFailed"); + end + + function ImportErrorInvalidSchema(testCase) + cArray = arrow.c.Array(); + cSchema = arrow.c.Schema(); + % An arrow:c:import:ImportFailed error should be thrown + % if the supplied arrow.c.Schema was not populated from a + % struct-like type (i.e. StructArray or RecordBatch). + a = arrow.array(1:3); + a.export(cArray.Address, cSchema.Address); + fcn = @() arrow.tabular.RecordBatch.import(cArray, cSchema); + testCase.verifyError(fcn, "arrow:c:import:ImportFailed"); + end + + function ImportFromStructArray(testCase) + % Verify a StructArray exported via the C Data Interface format + % can be imported as a RecordBatch. + field1 = arrow.array(1:3); + + field2 = arrow.array(["A" "B" "C"]); + structArray = arrow.array.StructArray.fromArrays(field1, field2, ... + FieldNames=["Number" "Text"]); + + cArray = arrow.c.Array(); + cSchema = arrow.c.Schema(); + structArray.export(cArray.Address, cSchema.Address) + rb = arrow.tabular.RecordBatch.import(cArray, cSchema); + + expected = arrow.tabular.RecordBatch.fromArrays(field1, field2, ... + ColumnNames=["Number" "Text"]); + + testCase.verifyEqual(rb, expected); + end + + function ExportToStructArray(testCase) + % Verify a RecordBatch exported via the C Data Interface + % format can be imported as a StructArray. + column1 = arrow.array(1:3); + column2 = arrow.array(["A" "B" "C"]); + rb = arrow.tabular.RecordBatch.fromArrays(column1, column2, ... + ColumnNames=["Number" "Text"]); + + cArray = arrow.c.Array(); + cSchema = arrow.c.Schema(); + rb.export(cArray.Address, cSchema.Address) + structArray = arrow.array.Array.import(cArray, cSchema); + + expected = arrow.array.StructArray.fromArrays(column1, column2, ... + FieldNames=["Number" "Text"]); + + testCase.verifyEqual(structArray, expected); + end + + end + +end \ No newline at end of file diff --git a/matlab/test/arrow/c/tSchema.m b/matlab/test/arrow/c/tSchema.m new file mode 100644 index 0000000000000..16dcf1965b463 --- /dev/null +++ b/matlab/test/arrow/c/tSchema.m @@ -0,0 +1,48 @@ +%TSCHEMA Defines unit tests for arrow.c.Schema. + +% Licensed to the Apache Software Foundation (ASF) under one or more +% contributor license agreements. See the NOTICE file distributed with +% this work for additional information regarding copyright ownership. +% The ASF licenses this file to you under the Apache License, Version +% 2.0 (the "License"); you may not use this file except in compliance +% with the License. You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +% implied. See the License for the specific language governing +% permissions and limitations under the License. +classdef tSchema < matlab.unittest.TestCase + + methods (Test) + function TestClassStructure(testCase) + schema = arrow.c.Schema(); + + % Verify schema is an instance of arrow.c.Schema. + testCase.verifyInstanceOf(schema, "arrow.c.Schema"); + + % Verify schema has one public property named Address. + props = properties(schema); + testCase.verifyEqual(props, {'Address'}); + end + + function TestAddressProperty(testCase) + schema = arrow.c.Schema(); + + % It's impossible to know what the value of Address will be. + % Just verify Address is a scalar uint64. + address = schema.Address; + testCase.verifyInstanceOf(address, "uint64"); + testCase.verifyTrue(isscalar(address)); + end + + function TestAddressNoSetter(testCase) + % Verify the Address property is read-only. + schema = arrow.c.Schema(); + fcn = @() setfield(schema, "Address", uint64(10)); + testCase.verifyError(fcn, "MATLAB:class:SetProhibited"); + end + end +end \ No newline at end of file diff --git a/matlab/tools/cmake/BuildMatlabArrowInterface.cmake b/matlab/tools/cmake/BuildMatlabArrowInterface.cmake index cb746e08b1f8e..0a747e648cd84 100644 --- a/matlab/tools/cmake/BuildMatlabArrowInterface.cmake +++ b/matlab/tools/cmake/BuildMatlabArrowInterface.cmake @@ -24,8 +24,7 @@ set(MATLAB_ARROW_LIBMEXCLASS_CLIENT_FETCH_CONTENT_NAME libmexclass) # libmexclass is accessible for CI without permission issues. set(MATLAB_ARROW_LIBMEXCLASS_CLIENT_FETCH_CONTENT_GIT_REPOSITORY "https://github.com/mathworks/libmexclass.git") # Use a specific Git commit hash to avoid libmexclass version changing unexpectedly. -set(MATLAB_ARROW_LIBMEXCLASS_CLIENT_FETCH_CONTENT_GIT_TAG "d04f88d") - +set(MATLAB_ARROW_LIBMEXCLASS_CLIENT_FETCH_CONTENT_GIT_TAG "ca3cea6") set(MATLAB_ARROW_LIBMEXCLASS_CLIENT_FETCH_CONTENT_SOURCE_SUBDIR "libmexclass/cpp") # ------------------------------------------ @@ -76,7 +75,11 @@ set(MATLAB_ARROW_LIBMEXCLASS_CLIENT_PROXY_SOURCES "${CMAKE_SOURCE_DIR}/src/cpp/a "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/io/csv/proxy/table_writer.cc" "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/io/csv/proxy/table_reader.cc" "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/index/validate.cc" - "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/buffer/proxy/buffer.cc") + "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/buffer/proxy/buffer.cc" + "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/c/proxy/array.cc" + "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/c/proxy/array_importer.cc" + "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/c/proxy/schema.cc" + "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/c/proxy/record_batch_importer.cc") set(MATLAB_ARROW_LIBMEXCLASS_CLIENT_PROXY_FACTORY_INCLUDE_DIR "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/proxy") diff --git a/matlab/tools/packageMatlabInterface.m b/matlab/tools/packageMatlabInterface.m index 55b4d4241a569..3d970002614ab 100644 --- a/matlab/tools/packageMatlabInterface.m +++ b/matlab/tools/packageMatlabInterface.m @@ -55,9 +55,18 @@ opts.SupportedPlatforms.Glnxa64 = true; opts.SupportedPlatforms.MatlabOnline = true; -% Interface is only qualified against R2023a at the moment -opts.MinimumMatlabRelease = "R2023a"; -opts.MaximumMatlabRelease = "R2023a"; +% MEX files use run-time libraries shipped with MATLAB (e.g. libmx, libmex, +% etc.). MEX files linked against earlier versions of MALTAB run-time libraries +% will most likely work on newer versions of MATLAB. However, this may not +% always be the case. +% +% For now, set the earliest and latest compatible releases of MATLAB to +% the release of MATLAB used to build and package the MATLAB Arrow Interface. +% +% See: https://www.mathworks.com/help/matlab/matlab_external/version-compatibility.html +currentRelease = matlabRelease.Release; +opts.MinimumMatlabRelease = currentRelease; +opts.MaximumMatlabRelease = currentRelease; opts.OutputFile = fullfile(outputFolder, compose("matlab-arrow-%s.mltbx", toolboxVersionRaw)); disp("Output File: " + opts.OutputFile); diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index 212862357ace2..a8bbed117163d 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -108,25 +108,6 @@ if(UNIX) endif() endif() -# Top level cmake dir -if("${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_CURRENT_SOURCE_DIR}") - option(PYARROW_BUILD_ACERO "Build the PyArrow Acero integration" OFF) - option(PYARROW_BUILD_CUDA "Build the PyArrow CUDA support" OFF) - option(PYARROW_BUILD_DATASET "Build the PyArrow Dataset integration" OFF) - option(PYARROW_BUILD_FLIGHT "Build the PyArrow Flight integration" OFF) - option(PYARROW_BUILD_GANDIVA "Build the PyArrow Gandiva integration" OFF) - option(PYARROW_BUILD_ORC "Build the PyArrow ORC integration" OFF) - option(PYARROW_BUILD_PARQUET "Build the PyArrow Parquet integration" OFF) - option(PYARROW_BUILD_PARQUET_ENCRYPTION - "Build the PyArrow Parquet encryption integration" OFF) - option(PYARROW_BUNDLE_ARROW_CPP "Bundle the Arrow C++ libraries" OFF) - option(PYARROW_BUNDLE_CYTHON_CPP "Bundle the C++ files generated by Cython" OFF) - option(PYARROW_GENERATE_COVERAGE "Build with Cython code coverage enabled" OFF) - set(PYARROW_CXXFLAGS - "" - CACHE STRING "Compiler flags to append when compiling Arrow") -endif() - find_program(CCACHE_FOUND ccache) if(CCACHE_FOUND AND NOT CMAKE_C_COMPILER_LAUNCHER @@ -265,11 +246,70 @@ message(STATUS "NumPy include dir: ${NUMPY_INCLUDE_DIRS}") include(UseCython) -# PyArrow C++ +# Arrow C++ and set default PyArrow build options include(GNUInstallDirs) - find_package(Arrow REQUIRED) +macro(define_option name description arrow_option) + set("PYARROW_${name}" + "AUTO" + CACHE STRING ${description}) + + if("${PYARROW_${name}}" STREQUAL "AUTO") + # by default, first check if env variable exists, otherwise use Arrow C++ config + set(env_variable "PYARROW_WITH_${name}") + if(DEFINED ENV{${env_variable}}) + if($ENV{${env_variable}}) + set("PYARROW_BUILD_${name}" ON) + else() + set("PYARROW_BUILD_${name}" OFF) + endif() + else() + if(${arrow_option}) + set("PYARROW_BUILD_${name}" ON) + else() + set("PYARROW_BUILD_${name}" OFF) + endif() + endif() + else() + if("${PYARROW_${name}}") + set("PYARROW_BUILD_${name}" ON) + else() + set("PYARROW_BUILD_${name}" OFF) + endif() + endif() +endmacro() + +define_option(ACERO "Build the PyArrow Acero integration" ARROW_ACERO) +define_option(CUDA "Build the PyArrow CUDA support" ARROW_CUDA) +define_option(DATASET "Build the PyArrow Dataset integration" ARROW_DATASET) +define_option(FLIGHT "Build the PyArrow Flight integration" ARROW_FLIGHT) +define_option(GANDIVA "Build the PyArrow Gandiva integration" ARROW_GANDIVA) +define_option(ORC "Build the PyArrow ORC integration" ARROW_ORC) +define_option(PARQUET "Build the PyArrow Parquet integration" ARROW_PARQUET) +define_option(PARQUET_ENCRYPTION "Build the PyArrow Parquet encryption integration" + PARQUET_REQUIRE_ENCRYPTION) +define_option(SUBSTRAIT "Build the PyArrow Substrait integration" ARROW_SUBSTRAIT) +define_option(AZURE "Build the PyArrow Azure integration" ARROW_AZURE) +define_option(GCS "Build the PyArrow GCS integration" ARROW_GCS) +define_option(S3 "Build the PyArrow S3 integration" ARROW_S3) +define_option(HDFS "Build the PyArrow HDFS integration" ARROW_HDFS) +option(PYARROW_BUNDLE_ARROW_CPP "Bundle the Arrow C++ libraries" OFF) +option(PYARROW_BUNDLE_CYTHON_CPP "Bundle the C++ files generated by Cython" OFF) +option(PYARROW_GENERATE_COVERAGE "Build with Cython code coverage enabled" OFF) +set(PYARROW_CXXFLAGS + "" + CACHE STRING "Compiler flags to append when compiling PyArrow C++") + +# enforce module dependencies +if(PYARROW_BUILD_SUBSTRAIT) + set(PYARROW_BUILD_DATASET ON) +endif() +if(PYARROW_BUILD_DATASET) + set(PYARROW_BUILD_ACERO ON) +endif() + +# PyArrow C++ set(PYARROW_CPP_ROOT_DIR pyarrow/src) set(PYARROW_CPP_SOURCE_DIR ${PYARROW_CPP_ROOT_DIR}/arrow/python) set(PYARROW_CPP_SRCS @@ -305,6 +345,7 @@ set(PYARROW_CPP_LINK_LIBS "") # Check all the options from Arrow and PyArrow C++ to be in line if(PYARROW_BUILD_DATASET) + message(STATUS "Building PyArrow with Dataset") if(NOT ARROW_DATASET) message(FATAL_ERROR "You must build Arrow C++ with ARROW_DATASET=ON") endif() @@ -317,6 +358,7 @@ if(PYARROW_BUILD_DATASET) endif() if(PYARROW_BUILD_ACERO) + message(STATUS "Building PyArrow with Acero") if(NOT ARROW_ACERO) message(FATAL_ERROR "You must build Arrow C++ with ARROW_ACERO=ON") endif() @@ -328,16 +370,17 @@ if(PYARROW_BUILD_ACERO) endif() endif() -if(PYARROW_BUILD_PARQUET OR PYARROW_BUILD_PARQUET_ENCRYPTION) +if(PYARROW_BUILD_PARQUET) + message(STATUS "Building PyArrow with Parquet") if(NOT ARROW_PARQUET) message(FATAL_ERROR "You must build Arrow C++ with ARROW_PARQUET=ON") endif() find_package(Parquet REQUIRED) -endif() - -if(PYARROW_BUILD_HDFS) - if(NOT ARROW_HDFS) - message(FATAL_ERROR "You must build Arrow C++ with ARROW_HDFS=ON") +else() + if(PYARROW_BUILD_PARQUET_ENCRYPTION) + message(WARNING "Building PyArrow with Parquet Encryption is requested, but Parquet itself is not enabled. Ignoring the Parquet Encryption setting." + ) + set(PYARROW_BUILD_PARQUET_ENCRYPTION OFF) endif() endif() @@ -400,6 +443,7 @@ endif() set(PYARROW_CPP_FLIGHT_SRCS ${PYARROW_CPP_SOURCE_DIR}/flight.cc) if(PYARROW_BUILD_FLIGHT) + message(STATUS "Building PyArrow with Flight") if(NOT ARROW_FLIGHT) message(FATAL_ERROR "You must build Arrow C++ with ARROW_FLIGHT=ON") endif() @@ -555,23 +599,39 @@ set_source_files_properties(pyarrow/lib.pyx PROPERTIES CYTHON_API TRUE) set(LINK_LIBS arrow_python) if(PYARROW_BUILD_AZURE) + message(STATUS "Building PyArrow with Azure") + if(NOT ARROW_AZURE) + message(FATAL_ERROR "You must build Arrow C++ with ARROW_AZURE=ON") + endif() list(APPEND CYTHON_EXTENSIONS _azurefs) endif() if(PYARROW_BUILD_GCS) + message(STATUS "Building PyArrow with GCS") + if(NOT ARROW_GCS) + message(FATAL_ERROR "You must build Arrow C++ with ARROW_GCS=ON") + endif() list(APPEND CYTHON_EXTENSIONS _gcsfs) endif() if(PYARROW_BUILD_S3) + message(STATUS "Building PyArrow with S3") + if(NOT ARROW_S3) + message(FATAL_ERROR "You must build Arrow C++ with ARROW_S3=ON") + endif() list(APPEND CYTHON_EXTENSIONS _s3fs) endif() if(PYARROW_BUILD_HDFS) + message(STATUS "Building PyArrow with HDFS") + if(NOT ARROW_HDFS) + message(FATAL_ERROR "You must build Arrow C++ with ARROW_HDFS=ON") + endif() list(APPEND CYTHON_EXTENSIONS _hdfs) endif() if(PYARROW_BUILD_CUDA) - # Arrow CUDA + message(STATUS "Building PyArrow with CUDA") if(NOT ARROW_CUDA) message(FATAL_ERROR "You must build Arrow C++ with ARROW_CUDA=ON") endif() @@ -646,8 +706,9 @@ if(PYARROW_BUILD_PARQUET) endif() endif() +# ORC if(PYARROW_BUILD_ORC) - # ORC + message(STATUS "Building PyArrow with ORC") if(NOT ARROW_ORC) message(FATAL_ERROR "You must build Arrow C++ with ARROW_ORC=ON") endif() @@ -679,6 +740,7 @@ endif() # Substrait if(PYARROW_BUILD_SUBSTRAIT) + message(STATUS "Building PyArrow with Substrait") if(NOT ARROW_SUBSTRAIT) message(FATAL_ERROR "You must build Arrow C++ with ARROW_SUBSTRAIT=ON") endif() @@ -696,6 +758,7 @@ endif() # Gandiva if(PYARROW_BUILD_GANDIVA) + message(STATUS "Building PyArrow with Gandiva") if(NOT ARROW_GANDIVA) message(FATAL_ERROR "You must build Arrow C++ with ARROW_GANDIVA=ON") endif() diff --git a/python/examples/minimal_build/Dockerfile.ubuntu b/python/examples/minimal_build/Dockerfile.ubuntu index ebea4b045e592..07cd69c082461 100644 --- a/python/examples/minimal_build/Dockerfile.ubuntu +++ b/python/examples/minimal_build/Dockerfile.ubuntu @@ -15,7 +15,7 @@ # specific language governing permissions and limitations # under the License. -FROM ubuntu:focal +FROM ubuntu:jammy ENV DEBIAN_FRONTEND=noninteractive @@ -32,6 +32,7 @@ RUN apt-get update -y -q && \ python3-dev \ python3-pip \ python3-venv \ + tzdata \ && \ apt-get clean && rm -rf /var/lib/apt/lists* diff --git a/python/examples/minimal_build/build_conda.sh b/python/examples/minimal_build/build_conda.sh index 72c3a5f9ea2cd..e90c800ae2eb1 100755 --- a/python/examples/minimal_build/build_conda.sh +++ b/python/examples/minimal_build/build_conda.sh @@ -97,9 +97,8 @@ export CMAKE_PREFIX_PATH=${ARROW_HOME}${CMAKE_PREFIX_PATH:+:${CMAKE_PREFIX_PATH} export PYARROW_BUILD_TYPE=Debug export PYARROW_CMAKE_GENERATOR=Ninja -# You can run either "develop" or "build_ext --inplace". Your pick - -# python setup.py build_ext --inplace -python setup.py develop +# Use the same command that we use on python_build.sh +python -m pip install --no-deps --no-build-isolation -vv . +popd -py.test pyarrow +pytest -vv -r s ${PYTEST_ARGS} --pyargs pyarrow diff --git a/python/examples/minimal_build/build_venv.sh b/python/examples/minimal_build/build_venv.sh index 3bd641d0e72c9..f462c4e9b9d0a 100755 --- a/python/examples/minimal_build/build_venv.sh +++ b/python/examples/minimal_build/build_venv.sh @@ -16,7 +16,7 @@ # specific language governing permissions and limitations # under the License. -set -e +set -ex #---------------------------------------------------------------------- # Change this to whatever makes sense for your system @@ -35,6 +35,7 @@ source $WORKDIR/venv/bin/activate git config --global --add safe.directory $ARROW_ROOT pip install -r $ARROW_ROOT/python/requirements-build.txt +pip install wheel #---------------------------------------------------------------------- # Build C++ library @@ -68,11 +69,11 @@ export CMAKE_PREFIX_PATH=${ARROW_HOME}${CMAKE_PREFIX_PATH:+:${CMAKE_PREFIX_PATH} export PYARROW_BUILD_TYPE=Debug export PYARROW_CMAKE_GENERATOR=Ninja -# You can run either "develop" or "build_ext --inplace". Your pick +# Use the same command that we use on python_build.sh +python -m pip install --no-deps --no-build-isolation -vv . -# python setup.py build_ext --inplace -python setup.py develop +popd pip install -r $ARROW_ROOT/python/requirements-test.txt -py.test pyarrow +pytest -vv -r s ${PYTEST_ARGS} --pyargs pyarrow diff --git a/python/pyarrow/__init__.py b/python/pyarrow/__init__.py index 936f4736977c8..e52e0d242bee5 100644 --- a/python/pyarrow/__init__.py +++ b/python/pyarrow/__init__.py @@ -236,6 +236,9 @@ def print_entry(label, value): RunEndEncodedScalar, ExtensionScalar) # Buffers, allocation +from pyarrow.lib import (DeviceAllocationType, Device, MemoryManager, + default_cpu_memory_manager) + from pyarrow.lib import (Buffer, ResizableBuffer, foreign_buffer, py_buffer, Codec, compress, decompress, allocate_buffer) diff --git a/python/pyarrow/_compute.pyx b/python/pyarrow/_compute.pyx index a267d53599436..44a3d5e740701 100644 --- a/python/pyarrow/_compute.pyx +++ b/python/pyarrow/_compute.pyx @@ -2035,6 +2035,26 @@ class PairwiseOptions(_PairwiseOptions): self._set_options(period) +cdef class _ListFlattenOptions(FunctionOptions): + def _set_options(self, recursive): + self.wrapped.reset(new CListFlattenOptions(recursive)) + + +class ListFlattenOptions(_ListFlattenOptions): + """ + Options for `list_flatten` function + + Parameters + ---------- + recursive : bool, default False + When True, the list array is flattened recursively until an array + of non-list values is formed. + """ + + def __init__(self, recursive=False): + self._set_options(recursive) + + cdef class _ArraySortOptions(FunctionOptions): def _set_options(self, order, null_placement): self.wrapped.reset(new CArraySortOptions( diff --git a/python/pyarrow/_dataset_parquet.pxd b/python/pyarrow/_dataset_parquet.pxd index d5bc172d324d5..0a3a2ff526ea4 100644 --- a/python/pyarrow/_dataset_parquet.pxd +++ b/python/pyarrow/_dataset_parquet.pxd @@ -29,6 +29,7 @@ cdef class ParquetFragmentScanOptions(FragmentScanOptions): cdef: CParquetFragmentScanOptions* parquet_options object _parquet_decryption_config + object _decryption_properties cdef void init(self, const shared_ptr[CFragmentScanOptions]& sp) cdef CReaderProperties* reader_properties(self) diff --git a/python/pyarrow/_dataset_parquet.pyx b/python/pyarrow/_dataset_parquet.pyx index a55e889ba8246..4942336a12666 100644 --- a/python/pyarrow/_dataset_parquet.pyx +++ b/python/pyarrow/_dataset_parquet.pyx @@ -56,7 +56,7 @@ from pyarrow._parquet cimport ( try: from pyarrow._dataset_parquet_encryption import ( - set_encryption_config, set_decryption_config + set_encryption_config, set_decryption_config, set_decryption_properties ) parquet_encryption_enabled = True except ImportError: @@ -127,8 +127,7 @@ cdef class ParquetFileFormat(FileFormat): 'instance of ParquetReadOptions') if default_fragment_scan_options is None: - default_fragment_scan_options = ParquetFragmentScanOptions( - **scan_args) + default_fragment_scan_options = ParquetFragmentScanOptions(**scan_args) elif isinstance(default_fragment_scan_options, dict): default_fragment_scan_options = ParquetFragmentScanOptions( **default_fragment_scan_options) @@ -715,6 +714,9 @@ cdef class ParquetFragmentScanOptions(FragmentScanOptions): decryption_config : pyarrow.dataset.ParquetDecryptionConfig, default None If not None, use the provided ParquetDecryptionConfig to decrypt the Parquet file. + decryption_properties : pyarrow.parquet.FileDecryptionProperties, default None + If not None, use the provided FileDecryptionProperties to decrypt encrypted + Parquet file. page_checksum_verification : bool, default False If True, verify the page checksum for each page read from the file. """ @@ -729,6 +731,7 @@ cdef class ParquetFragmentScanOptions(FragmentScanOptions): thrift_string_size_limit=None, thrift_container_size_limit=None, decryption_config=None, + decryption_properties=None, bint page_checksum_verification=False): self.init(shared_ptr[CFragmentScanOptions]( new CParquetFragmentScanOptions())) @@ -743,6 +746,8 @@ cdef class ParquetFragmentScanOptions(FragmentScanOptions): self.thrift_container_size_limit = thrift_container_size_limit if decryption_config is not None: self.parquet_decryption_config = decryption_config + if decryption_properties is not None: + self.decryption_properties = decryption_properties self.page_checksum_verification = page_checksum_verification cdef void init(self, const shared_ptr[CFragmentScanOptions]& sp): @@ -812,6 +817,25 @@ cdef class ParquetFragmentScanOptions(FragmentScanOptions): raise ValueError("size must be larger than zero") self.reader_properties().set_thrift_container_size_limit(size) + @property + def decryption_properties(self): + if not parquet_encryption_enabled: + raise NotImplementedError( + "Unable to access encryption features. " + "Encryption is not enabled in your installation of pyarrow." + ) + return self._decryption_properties + + @decryption_properties.setter + def decryption_properties(self, config): + if not parquet_encryption_enabled: + raise NotImplementedError( + "Encryption is not enabled in your installation of pyarrow, but " + "decryption_properties were provided." + ) + set_decryption_properties(self, config) + self._decryption_properties = config + @property def parquet_decryption_config(self): if not parquet_encryption_enabled: diff --git a/python/pyarrow/_dataset_parquet_encryption.pyx b/python/pyarrow/_dataset_parquet_encryption.pyx index 11a7174eb3c9d..c8f5e5b01bf81 100644 --- a/python/pyarrow/_dataset_parquet_encryption.pyx +++ b/python/pyarrow/_dataset_parquet_encryption.pyx @@ -162,6 +162,14 @@ def set_encryption_config( opts.parquet_options.parquet_encryption_config = c_config +def set_decryption_properties( + ParquetFragmentScanOptions opts not None, + FileDecryptionProperties config not None +): + cdef CReaderProperties* reader_props = opts.reader_properties() + reader_props.file_decryption_properties(config.unwrap()) + + def set_decryption_config( ParquetFragmentScanOptions opts not None, ParquetDecryptionConfig config not None diff --git a/python/pyarrow/_fs.pyx b/python/pyarrow/_fs.pyx index 0e635b2c8a28a..dbfb6ed114553 100644 --- a/python/pyarrow/_fs.pyx +++ b/python/pyarrow/_fs.pyx @@ -18,6 +18,7 @@ # cython: language_level = 3 from cpython.datetime cimport datetime, PyDateTime_DateTime +from cython cimport binding from pyarrow.includes.common cimport * from pyarrow.includes.libarrow_python cimport PyDateTime_to_TimePoint @@ -421,6 +422,7 @@ cdef class FileSystem(_Weakrefable): "SubTreeFileSystem") @staticmethod + @binding(True) # Required for cython < 3 def _from_uri(uri): fs, _path = FileSystem.from_uri(uri) return fs diff --git a/python/pyarrow/_parquet.pxd b/python/pyarrow/_parquet.pxd index ae4094d8b4b5f..1bfa505c54470 100644 --- a/python/pyarrow/_parquet.pxd +++ b/python/pyarrow/_parquet.pxd @@ -554,6 +554,7 @@ cdef extern from "parquet/arrow/writer.h" namespace "parquet::arrow" nogil: CStatus WriteTable(const CTable& table, int64_t chunk_size) CStatus NewRowGroup(int64_t chunk_size) CStatus Close() + CStatus AddKeyValueMetadata(const shared_ptr[const CKeyValueMetadata]& key_value_metadata) const shared_ptr[CFileMetaData] metadata() const diff --git a/python/pyarrow/_parquet.pyx b/python/pyarrow/_parquet.pyx index 7bc68a288aa78..414f0cef4e52b 100644 --- a/python/pyarrow/_parquet.pyx +++ b/python/pyarrow/_parquet.pyx @@ -29,9 +29,10 @@ from pyarrow.includes.libarrow_python cimport * from pyarrow.lib cimport (_Weakrefable, Buffer, Schema, check_status, MemoryPool, maybe_unbox_memory_pool, - Table, NativeFile, + Table, KeyValueMetadata, pyarrow_wrap_chunked_array, pyarrow_wrap_schema, + pyarrow_unwrap_metadata, pyarrow_unwrap_schema, pyarrow_wrap_table, pyarrow_wrap_batch, @@ -705,6 +706,22 @@ cdef class SortingColumn: """Whether null values appear before valid values (bool).""" return self.nulls_first + def to_dict(self): + """ + Get dictionary representation of the SortingColumn. + + Returns + ------- + dict + Dictionary with a key for each attribute of this class. + """ + d = dict( + column_index=self.column_index, + descending=self.descending, + nulls_first=self.nulls_first + ) + return d + cdef class RowGroupMetaData(_Weakrefable): """Metadata for a single row group.""" @@ -2190,6 +2207,15 @@ cdef class ParquetWriter(_Weakrefable): check_status(self.writer.get() .WriteTable(deref(ctable), c_row_group_size)) + def add_key_value_metadata(self, key_value_metadata): + cdef: + shared_ptr[const CKeyValueMetadata] c_metadata + + c_metadata = pyarrow_unwrap_metadata(KeyValueMetadata(key_value_metadata)) + with nogil: + check_status(self.writer.get() + .AddKeyValueMetadata(c_metadata)) + @property def metadata(self): cdef: diff --git a/python/pyarrow/_s3fs.pyx b/python/pyarrow/_s3fs.pyx index f5bab99a49f7a..ba6603322838d 100644 --- a/python/pyarrow/_s3fs.pyx +++ b/python/pyarrow/_s3fs.pyx @@ -185,7 +185,7 @@ cdef class S3FileSystem(FileSystem): session_token : str, default None AWS Session Token. An optional session token, required if access_key and secret_key are temporary credentials from STS. - anonymous : boolean, default False + anonymous : bool, default False Whether to connect anonymously if access_key and secret_key are None. If true, will not attempt to look up credentials using standard AWS configuration methods. @@ -217,7 +217,7 @@ cdef class S3FileSystem(FileSystem): S3 connection transport scheme. endpoint_override : str, default None Override region with a connect string such as "localhost:9000" - background_writes : boolean, default True + background_writes : bool, default True Whether file writes will be issued in the background, without blocking. default_metadata : mapping or pyarrow.KeyValueMetadata, default None @@ -237,11 +237,20 @@ cdef class S3FileSystem(FileSystem): 'port': 8020, 'username': 'username', 'password': 'password'}) allow_bucket_creation : bool, default False - Whether to allow CreateDir at the bucket-level. This option may also be + Whether to allow directory creation at the bucket-level. This option may also be passed in a URI query parameter. allow_bucket_deletion : bool, default False - Whether to allow DeleteDir at the bucket-level. This option may also be + Whether to allow directory deletion at the bucket-level. This option may also be passed in a URI query parameter. + check_directory_existence_before_creation : bool, default false + Whether to check the directory existence before creating it. + If false, when creating a directory the code will not check if it already + exists or not. It's an optimization to try directory creation and catch the error, + rather than issue two dependent I/O calls. + If true, when creating a directory the code will only create the directory when necessary + at the cost of extra I/O calls. This can be used for key/value cloud storage which has + a hard rate limit to number of object mutation operations or scenerios such as + the directories already exist and you do not have creation access. retry_strategy : S3RetryStrategy, default AwsStandardS3RetryStrategy(max_attempts=3) The retry strategy to use with S3; fail after max_attempts. Available strategies are AwsStandardS3RetryStrategy, AwsDefaultS3RetryStrategy. @@ -273,6 +282,7 @@ cdef class S3FileSystem(FileSystem): role_arn=None, session_name=None, external_id=None, load_frequency=900, proxy_options=None, allow_bucket_creation=False, allow_bucket_deletion=False, + check_directory_existence_before_creation=False, retry_strategy: S3RetryStrategy = AwsStandardS3RetryStrategy( max_attempts=3), force_virtual_addressing=False): @@ -387,6 +397,7 @@ cdef class S3FileSystem(FileSystem): options.value().allow_bucket_creation = allow_bucket_creation options.value().allow_bucket_deletion = allow_bucket_deletion + options.value().check_directory_existence_before_creation = check_directory_existence_before_creation options.value().force_virtual_addressing = force_virtual_addressing if isinstance(retry_strategy, AwsStandardS3RetryStrategy): @@ -447,6 +458,7 @@ cdef class S3FileSystem(FileSystem): background_writes=opts.background_writes, allow_bucket_creation=opts.allow_bucket_creation, allow_bucket_deletion=opts.allow_bucket_deletion, + check_directory_existence_before_creation=opts.check_directory_existence_before_creation, default_metadata=pyarrow_wrap_metadata(opts.default_metadata), proxy_options={'scheme': frombytes(opts.proxy_options.scheme), 'host': frombytes(opts.proxy_options.host), diff --git a/python/pyarrow/array.pxi b/python/pyarrow/array.pxi index 60fc09ea861b6..3c26e85887466 100644 --- a/python/pyarrow/array.pxi +++ b/python/pyarrow/array.pxi @@ -2141,22 +2141,99 @@ cdef class Decimal256Array(FixedSizeBinaryArray): cdef class BaseListArray(Array): - def flatten(self): + def flatten(self, recursive=False): """ - Unnest this ListArray/LargeListArray by one level. - - The returned Array is logically a concatenation of all the sub-lists - in this Array. + Unnest this [Large]ListArray/[Large]ListViewArray/FixedSizeListArray + according to 'recursive'. Note that this method is different from ``self.values`` in that it takes care of the slicing offset as well as null elements backed by non-empty sub-lists. + Parameters + ---------- + recursive : bool, default False, optional + When True, flatten this logical list-array recursively until an + array of non-list values is formed. + + When False, flatten only the top level. + Returns ------- result : Array + + Examples + -------- + + Basic logical list-array's flatten + >>> import pyarrow as pa + >>> values = [1, 2, 3, 4] + >>> offsets = [2, 1, 0] + >>> sizes = [2, 2, 2] + >>> array = pa.ListViewArray.from_arrays(offsets, sizes, values) + >>> array + + [ + [ + 3, + 4 + ], + [ + 2, + 3 + ], + [ + 1, + 2 + ] + ] + >>> array.flatten() + + [ + 3, + 4, + 2, + 3, + 1, + 2 + ] + + When recursive=True, nested list arrays are flattened recursively + until an array of non-list values is formed. + + >>> array = pa.array([ + ... None, + ... [ + ... [1, None, 2], + ... None, + ... [3, 4] + ... ], + ... [], + ... [ + ... [], + ... [5, 6], + ... None + ... ], + ... [ + ... [7, 8] + ... ] + ... ], type=pa.list_(pa.list_(pa.int64()))) + >>> array.flatten(True) + + [ + 1, + null, + 2, + 3, + 4, + 5, + 6, + 7, + 8 + ] """ - return _pc().list_flatten(self) + options = _pc().ListFlattenOptions(recursive) + return _pc().list_flatten(self, options=options) def value_parent_indices(self): """ @@ -2527,7 +2604,7 @@ cdef class LargeListArray(BaseListArray): return pyarrow_wrap_array(( self.ap).offsets()) -cdef class ListViewArray(Array): +cdef class ListViewArray(BaseListArray): """ Concrete class for Arrow arrays of a list view data type. """ @@ -2747,69 +2824,8 @@ cdef class ListViewArray(Array): """ return pyarrow_wrap_array(( self.ap).sizes()) - def flatten(self, memory_pool=None): - """ - Unnest this ListViewArray by one level. - - The returned Array is logically a concatenation of all the sub-lists - in this Array. - - Note that this method is different from ``self.values`` in that - it takes care of the slicing offset as well as null elements backed - by non-empty sub-lists. - - Parameters - ---------- - memory_pool : MemoryPool, optional - - Returns - ------- - result : Array - Examples - -------- - - >>> import pyarrow as pa - >>> values = [1, 2, 3, 4] - >>> offsets = [2, 1, 0] - >>> sizes = [2, 2, 2] - >>> array = pa.ListViewArray.from_arrays(offsets, sizes, values) - >>> array - - [ - [ - 3, - 4 - ], - [ - 2, - 3 - ], - [ - 1, - 2 - ] - ] - >>> array.flatten() - - [ - 3, - 4, - 2, - 3, - 1, - 2 - ] - """ - cdef CMemoryPool* cpool = maybe_unbox_memory_pool(memory_pool) - with nogil: - out = GetResultValue(( self.ap).Flatten(cpool)) - cdef Array result = pyarrow_wrap_array(out) - result.validate() - return result - - -cdef class LargeListViewArray(Array): +cdef class LargeListViewArray(BaseListArray): """ Concrete class for Arrow arrays of a large list view data type. @@ -3037,67 +3053,6 @@ cdef class LargeListViewArray(Array): """ return pyarrow_wrap_array(( self.ap).sizes()) - def flatten(self, memory_pool=None): - """ - Unnest this LargeListViewArray by one level. - - The returned Array is logically a concatenation of all the sub-lists - in this Array. - - Note that this method is different from ``self.values`` in that - it takes care of the slicing offset as well as null elements backed - by non-empty sub-lists. - - Parameters - ---------- - memory_pool : MemoryPool, optional - - Returns - ------- - result : Array - - Examples - -------- - - >>> import pyarrow as pa - >>> values = [1, 2, 3, 4] - >>> offsets = [2, 1, 0] - >>> sizes = [2, 2, 2] - >>> array = pa.LargeListViewArray.from_arrays(offsets, sizes, values) - >>> array - - [ - [ - 3, - 4 - ], - [ - 2, - 3 - ], - [ - 1, - 2 - ] - ] - >>> array.flatten() - - [ - 3, - 4, - 2, - 3, - 1, - 2 - ] - """ - cdef CMemoryPool* cpool = maybe_unbox_memory_pool(memory_pool) - with nogil: - out = GetResultValue(( self.ap).Flatten(cpool)) - cdef Array result = pyarrow_wrap_array(out) - result.validate() - return result - cdef class MapArray(ListArray): """ @@ -3105,7 +3060,7 @@ cdef class MapArray(ListArray): """ @staticmethod - def from_arrays(offsets, keys, items, DataType type=None, MemoryPool pool=None): + def from_arrays(offsets, keys, items, DataType type=None, MemoryPool pool=None, mask=None): """ Construct MapArray from arrays of int32 offsets and key, item arrays. @@ -3117,6 +3072,8 @@ cdef class MapArray(ListArray): type : DataType, optional If not specified, a default MapArray with the keys' and items' type is used. pool : MemoryPool + mask : Array (boolean type), optional + Indicate which values are null (True) or not null (False). Returns ------- @@ -3198,24 +3155,27 @@ cdef class MapArray(ListArray): cdef: Array _offsets, _keys, _items shared_ptr[CArray] out + shared_ptr[CBuffer] c_mask cdef CMemoryPool* cpool = maybe_unbox_memory_pool(pool) _offsets = asarray(offsets, type='int32') _keys = asarray(keys) _items = asarray(items) + c_mask = c_mask_inverted_from_obj(mask, pool) + if type is not None: with nogil: out = GetResultValue( CMapArray.FromArraysAndType( type.sp_type, _offsets.sp_array, - _keys.sp_array, _items.sp_array, cpool)) + _keys.sp_array, _items.sp_array, cpool, c_mask)) else: with nogil: out = GetResultValue( CMapArray.FromArrays(_offsets.sp_array, _keys.sp_array, - _items.sp_array, cpool)) + _items.sp_array, cpool, c_mask)) cdef Array result = pyarrow_wrap_array(out) result.validate() return result @@ -3965,12 +3925,11 @@ cdef class StructArray(Array): result : StructArray """ if by is not None: - tosort = self._flattened_field(by) + tosort, sort_keys = self._flattened_field(by), [("", order)] else: - tosort = self + tosort, sort_keys = self, [(field.name, order) for field in self.type] indices = _pc().sort_indices( - tosort, - options=_pc().SortOptions(sort_keys=[("", order)], **kwargs) + tosort, options=_pc().SortOptions(sort_keys=sort_keys, **kwargs) ) return self.take(indices) @@ -4029,7 +3988,7 @@ cdef class RunEndEncodedArray(Array): ------- RunEndEncodedArray """ - logical_length = run_ends[-1] if len(run_ends) > 0 else 0 + logical_length = scalar(run_ends[-1]).as_py() if len(run_ends) > 0 else 0 return RunEndEncodedArray._from_arrays(type, True, logical_length, run_ends, values, 0) diff --git a/python/pyarrow/compute.py b/python/pyarrow/compute.py index 205ab393b8b09..83612f66d21e2 100644 --- a/python/pyarrow/compute.py +++ b/python/pyarrow/compute.py @@ -44,6 +44,7 @@ IndexOptions, JoinOptions, ListSliceOptions, + ListFlattenOptions, MakeStructOptions, MapLookupOptions, MatchSubstringOptions, diff --git a/python/pyarrow/device.pxi b/python/pyarrow/device.pxi new file mode 100644 index 0000000000000..6e6034752085a --- /dev/null +++ b/python/pyarrow/device.pxi @@ -0,0 +1,162 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# cython: profile=False +# distutils: language = c++ +# cython: embedsignature = True + + +cpdef enum DeviceAllocationType: + CPU = CDeviceAllocationType_kCPU + CUDA = CDeviceAllocationType_kCUDA + CUDA_HOST = CDeviceAllocationType_kCUDA_HOST + OPENCL = CDeviceAllocationType_kOPENCL + VULKAN = CDeviceAllocationType_kVULKAN + METAL = CDeviceAllocationType_kMETAL + VPI = CDeviceAllocationType_kVPI + ROCM = CDeviceAllocationType_kROCM + ROCM_HOST = CDeviceAllocationType_kROCM_HOST + EXT_DEV = CDeviceAllocationType_kEXT_DEV + CUDA_MANAGED = CDeviceAllocationType_kCUDA_MANAGED + ONEAPI = CDeviceAllocationType_kONEAPI + WEBGPU = CDeviceAllocationType_kWEBGPU + HEXAGON = CDeviceAllocationType_kHEXAGON + + +cdef object _wrap_device_allocation_type(CDeviceAllocationType device_type): + return DeviceAllocationType( device_type) + + +cdef class Device(_Weakrefable): + """ + Abstract interface for hardware devices + + This object represents a device with access to some memory spaces. + When handling a Buffer or raw memory address, it allows deciding in which + context the raw memory address should be interpreted + (e.g. CPU-accessible memory, or embedded memory on some particular GPU). + """ + + def __init__(self): + raise TypeError("Do not call Device's constructor directly, " + "use the device attribute of the MemoryManager instead.") + + cdef void init(self, const shared_ptr[CDevice]& device): + self.device = device + + @staticmethod + cdef wrap(const shared_ptr[CDevice]& device): + cdef Device self = Device.__new__(Device) + self.init(device) + return self + + def __eq__(self, other): + if not isinstance(other, Device): + return False + return self.device.get().Equals(deref((other).device.get())) + + def __repr__(self): + return "".format(frombytes(self.device.get().ToString())) + + @property + def type_name(self): + """ + A shorthand for this device's type. + """ + return frombytes(self.device.get().type_name()) + + @property + def device_id(self): + """ + A device ID to identify this device if there are multiple of this type. + + If there is no "device_id" equivalent (such as for the main CPU device on + non-numa systems) returns -1. + """ + return self.device.get().device_id() + + @property + def is_cpu(self): + """ + Whether this device is the main CPU device. + + This shorthand method is very useful when deciding whether a memory address + is CPU-accessible. + """ + return self.device.get().is_cpu() + + @property + def device_type(self): + """ + Return the DeviceAllocationType of this device. + """ + return _wrap_device_allocation_type(self.device.get().device_type()) + + +cdef class MemoryManager(_Weakrefable): + """ + An object that provides memory management primitives. + + A MemoryManager is always tied to a particular Device instance. + It can also have additional parameters (such as a MemoryPool to + allocate CPU memory). + + """ + + def __init__(self): + raise TypeError("Do not call MemoryManager's constructor directly, " + "use pyarrow.default_cpu_memory_manager() instead.") + + cdef void init(self, const shared_ptr[CMemoryManager]& mm): + self.memory_manager = mm + + @staticmethod + cdef wrap(const shared_ptr[CMemoryManager]& mm): + cdef MemoryManager self = MemoryManager.__new__(MemoryManager) + self.init(mm) + return self + + def __repr__(self): + return "".format( + frombytes(self.memory_manager.get().device().get().ToString()) + ) + + @property + def device(self): + """ + The device this MemoryManager is tied to. + """ + return Device.wrap(self.memory_manager.get().device()) + + @property + def is_cpu(self): + """ + Whether this MemoryManager is tied to the main CPU device. + + This shorthand method is very useful when deciding whether a memory + address is CPU-accessible. + """ + return self.memory_manager.get().is_cpu() + + +def default_cpu_memory_manager(): + """ + Return the default CPU MemoryManager instance. + + The returned singleton instance uses the default MemoryPool. + """ + return MemoryManager.wrap(c_default_cpu_memory_manager()) diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd index 6dae45ab80b1c..0d63ec6be38d8 100644 --- a/python/pyarrow/includes/libarrow.pxd +++ b/python/pyarrow/includes/libarrow.pxd @@ -316,6 +316,38 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil: cdef cppclass CProxyMemoryPool" arrow::ProxyMemoryPool"(CMemoryPool): CProxyMemoryPool(CMemoryPool*) + ctypedef enum CDeviceAllocationType "arrow::DeviceAllocationType": + CDeviceAllocationType_kCPU "arrow::DeviceAllocationType::kCPU" + CDeviceAllocationType_kCUDA "arrow::DeviceAllocationType::kCUDA" + CDeviceAllocationType_kCUDA_HOST "arrow::DeviceAllocationType::kCUDA_HOST" + CDeviceAllocationType_kOPENCL "arrow::DeviceAllocationType::kOPENCL" + CDeviceAllocationType_kVULKAN "arrow::DeviceAllocationType::kVULKAN" + CDeviceAllocationType_kMETAL "arrow::DeviceAllocationType::kMETAL" + CDeviceAllocationType_kVPI "arrow::DeviceAllocationType::kVPI" + CDeviceAllocationType_kROCM "arrow::DeviceAllocationType::kROCM" + CDeviceAllocationType_kROCM_HOST "arrow::DeviceAllocationType::kROCM_HOST" + CDeviceAllocationType_kEXT_DEV "arrow::DeviceAllocationType::kEXT_DEV" + CDeviceAllocationType_kCUDA_MANAGED "arrow::DeviceAllocationType::kCUDA_MANAGED" + CDeviceAllocationType_kONEAPI "arrow::DeviceAllocationType::kONEAPI" + CDeviceAllocationType_kWEBGPU "arrow::DeviceAllocationType::kWEBGPU" + CDeviceAllocationType_kHEXAGON "arrow::DeviceAllocationType::kHEXAGON" + + cdef cppclass CDevice" arrow::Device": + const char* type_name() + c_string ToString() + c_bool Equals(const CDevice& other) + int64_t device_id() + c_bool is_cpu() const + shared_ptr[CMemoryManager] default_memory_manager() + CDeviceAllocationType device_type() + + cdef cppclass CMemoryManager" arrow::MemoryManager": + const shared_ptr[CDevice] device() + c_bool is_cpu() const + + shared_ptr[CMemoryManager] c_default_cpu_memory_manager \ + " arrow::default_cpu_memory_manager"() + cdef cppclass CBuffer" arrow::Buffer": CBuffer(const uint8_t* data, int64_t size) const uint8_t* data() @@ -328,6 +360,9 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil: c_bool is_mutable() const c_string ToHexString() c_bool Equals(const CBuffer& other) + shared_ptr[CDevice] device() + const shared_ptr[CMemoryManager] memory_manager() + CDeviceAllocationType device_type() CResult[shared_ptr[CBuffer]] SliceBufferSafe( const shared_ptr[CBuffer]& buffer, int64_t offset) @@ -788,7 +823,9 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil: const shared_ptr[CArray]& offsets, const shared_ptr[CArray]& keys, const shared_ptr[CArray]& items, - CMemoryPool* pool) + CMemoryPool* pool, + const shared_ptr[CBuffer] null_bitmap, + ) @staticmethod CResult[shared_ptr[CArray]] FromArraysAndType" FromArrays"( @@ -796,7 +833,9 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil: const shared_ptr[CArray]& offsets, const shared_ptr[CArray]& keys, const shared_ptr[CArray]& items, - CMemoryPool* pool) + CMemoryPool* pool, + const shared_ptr[CBuffer] null_bitmap, + ) shared_ptr[CArray] keys() shared_ptr[CArray] items() @@ -2589,6 +2628,11 @@ cdef extern from "arrow/compute/api.h" namespace "arrow::compute" nogil: CPairwiseOptions(int64_t period) int64_t period + cdef cppclass CListFlattenOptions\ + "arrow::compute::ListFlattenOptions"(CFunctionOptions): + CListFlattenOptions(c_bool recursive) + c_bool recursive + cdef cppclass CArraySortOptions \ "arrow::compute::ArraySortOptions"(CFunctionOptions): CArraySortOptions(CSortOrder, CNullPlacement) @@ -2796,6 +2840,8 @@ cdef extern from "arrow/extension_type.h" namespace "arrow": cdef cppclass CExtensionType" arrow::ExtensionType"(CDataType): c_string extension_name() shared_ptr[CDataType] storage_type() + int byte_width() + int bit_width() @staticmethod shared_ptr[CArray] WrapArray(shared_ptr[CDataType] ext_type, diff --git a/python/pyarrow/includes/libarrow_fs.pxd b/python/pyarrow/includes/libarrow_fs.pxd index f1f2985f65394..cc260b80c7779 100644 --- a/python/pyarrow/includes/libarrow_fs.pxd +++ b/python/pyarrow/includes/libarrow_fs.pxd @@ -157,6 +157,7 @@ cdef extern from "arrow/filesystem/api.h" namespace "arrow::fs" nogil: c_bool background_writes c_bool allow_bucket_creation c_bool allow_bucket_deletion + c_bool check_directory_existence_before_creation c_bool force_virtual_addressing shared_ptr[const CKeyValueMetadata] default_metadata c_string role_arn diff --git a/python/pyarrow/io.pxi b/python/pyarrow/io.pxi index 7890bf4b2dd76..48b7934209c3a 100644 --- a/python/pyarrow/io.pxi +++ b/python/pyarrow/io.pxi @@ -1327,6 +1327,39 @@ cdef class Buffer(_Weakrefable): """ return self.buffer.get().is_cpu() + @property + def device(self): + """ + The device where the buffer resides. + + Returns + ------- + Device + """ + return Device.wrap(self.buffer.get().device()) + + @property + def memory_manager(self): + """ + The memory manager associated with the buffer. + + Returns + ------- + MemoryManager + """ + return MemoryManager.wrap(self.buffer.get().memory_manager()) + + @property + def device_type(self): + """ + The device type where the buffer resides. + + Returns + ------- + DeviceAllocationType + """ + return _wrap_device_allocation_type(self.buffer.get().device_type()) + @property def parent(self): cdef shared_ptr[CBuffer] parent_buf = self.buffer.get().parent() @@ -1446,27 +1479,6 @@ cdef class Buffer(_Weakrefable): buffer.strides = self.strides buffer.suboffsets = NULL - def __getsegcount__(self, Py_ssize_t *len_out): - if len_out != NULL: - len_out[0] = self.size - return 1 - - def __getreadbuffer__(self, Py_ssize_t idx, void **p): - if idx != 0: - raise SystemError("accessing nonexistent buffer segment") - if p != NULL: - p[0] = self.buffer.get().data() - return self.size - - def __getwritebuffer__(self, Py_ssize_t idx, void **p): - if not self.buffer.get().is_mutable(): - raise SystemError("trying to write an immutable buffer") - if idx != 0: - raise SystemError("accessing nonexistent buffer segment") - if p != NULL: - p[0] = self.buffer.get().data() - return self.size - cdef class ResizableBuffer(Buffer): """ @@ -2142,21 +2154,21 @@ cdef class CacheOptions(_Weakrefable): Parameters ---------- hole_size_limit : int, default 8KiB - The maximum distance in bytes between two consecutive ranges; beyond + The maximum distance in bytes between two consecutive ranges; beyond this value, ranges are not combined. range_size_limit : int, default 32MiB - The maximum size in bytes of a combined range; if combining two - consecutive ranges would produce a range of a size greater than this, + The maximum size in bytes of a combined range; if combining two + consecutive ranges would produce a range of a size greater than this, they are not combined lazy : bool, default True lazy = false: request all byte ranges when PreBuffer or WillNeed is called. - lazy = True, prefetch_limit = 0: request merged byte ranges only after the reader - needs them. - lazy = True, prefetch_limit = k: prefetch up to k merged byte ranges ahead of the + lazy = True, prefetch_limit = 0: request merged byte ranges only after the reader + needs them. + lazy = True, prefetch_limit = k: prefetch up to k merged byte ranges ahead of the range that is currently being read. prefetch_limit : int, default 0 - The maximum number of ranges to be prefetched. This is only used for - lazy cache to asynchronously read some ranges after reading the target + The maximum number of ranges to be prefetched. This is only used for + lazy cache to asynchronously read some ranges after reading the target range. """ @@ -2227,19 +2239,19 @@ cdef class CacheOptions(_Weakrefable): """ Create suiteable CacheOptions based on provided network metrics. - Typically this will be used with object storage solutions like Amazon S3, + Typically this will be used with object storage solutions like Amazon S3, Google Cloud Storage and Azure Blob Storage. Parameters ---------- time_to_first_byte_millis : int - Seek-time or Time-To-First-Byte (TTFB) in milliseconds, also called call - setup latency of a new read request. The value is a positive integer. + Seek-time or Time-To-First-Byte (TTFB) in milliseconds, also called call + setup latency of a new read request. The value is a positive integer. transfer_bandwidth_mib_per_sec : int - Data transfer Bandwidth (BW) in MiB/sec (per connection). The value is a positive + Data transfer Bandwidth (BW) in MiB/sec (per connection). The value is a positive integer. ideal_bandwidth_utilization_frac : int, default 0.9 - Transfer bandwidth utilization fraction (per connection) to maximize the net + Transfer bandwidth utilization fraction (per connection) to maximize the net data load. The value is a positive float less than 1. max_ideal_request_size_mib : int, default 64 The maximum single data request size (in MiB) to maximize the net data load. diff --git a/python/pyarrow/lib.pxd b/python/pyarrow/lib.pxd index b1187a77c2a6e..1bc639cc8d2ba 100644 --- a/python/pyarrow/lib.pxd +++ b/python/pyarrow/lib.pxd @@ -437,11 +437,11 @@ cdef class LargeListArray(BaseListArray): pass -cdef class ListViewArray(Array): +cdef class ListViewArray(BaseListArray): pass -cdef class LargeListViewArray(Array): +cdef class LargeListViewArray(BaseListArray): pass @@ -524,6 +524,26 @@ cdef class RecordBatch(_Tabular): cdef void init(self, const shared_ptr[CRecordBatch]& table) +cdef class Device(_Weakrefable): + cdef: + shared_ptr[CDevice] device + + cdef void init(self, const shared_ptr[CDevice]& device) + + @staticmethod + cdef wrap(const shared_ptr[CDevice]& device) + + +cdef class MemoryManager(_Weakrefable): + cdef: + shared_ptr[CMemoryManager] memory_manager + + cdef void init(self, const shared_ptr[CMemoryManager]& memory_manager) + + @staticmethod + cdef wrap(const shared_ptr[CMemoryManager]& mm) + + cdef class Buffer(_Weakrefable): cdef: shared_ptr[CBuffer] buffer diff --git a/python/pyarrow/lib.pyx b/python/pyarrow/lib.pyx index 3245e50f0fe69..904e018ffddcc 100644 --- a/python/pyarrow/lib.pyx +++ b/python/pyarrow/lib.pyx @@ -162,6 +162,9 @@ include "pandas-shim.pxi" # Memory pools and allocation include "memory.pxi" +# Device type and memory manager +include "device.pxi" + # DataType, Field, Schema include "types.pxi" diff --git a/python/pyarrow/pandas_compat.py b/python/pyarrow/pandas_compat.py index 00fa19604e5c3..e246f1263d20d 100644 --- a/python/pyarrow/pandas_compat.py +++ b/python/pyarrow/pandas_compat.py @@ -23,6 +23,7 @@ # module bug (ARROW-11983) import concurrent.futures.thread # noqa from copy import deepcopy +import decimal from itertools import zip_longest import json import operator @@ -1027,6 +1028,7 @@ def _is_generated_index_name(name): 'string': np.str_, 'integer': np.int64, 'floating': np.float64, + 'decimal': np.object_, 'empty': np.object_, } @@ -1105,6 +1107,9 @@ def _reconstruct_columns_from_metadata(columns, column_indexes): tz = pa.lib.string_to_tzinfo( column_indexes[0]['metadata']['timezone']) level = pd.to_datetime(level, utc=True).tz_convert(tz) + # GH-41503: if the column index was decimal, restore to decimal + elif pandas_dtype == "decimal": + level = _pandas_api.pd.Index([decimal.Decimal(i) for i in level]) elif level.dtype != dtype: level = level.astype(dtype) # ARROW-9096: if original DataFrame was upcast we keep that diff --git a/python/pyarrow/parquet/core.py b/python/pyarrow/parquet/core.py index 69a1c9d19aae2..eaff79c8b137c 100644 --- a/python/pyarrow/parquet/core.py +++ b/python/pyarrow/parquet/core.py @@ -797,8 +797,9 @@ def _sanitize_table(table, new_schema, flavor): Specify if the byte_stream_split encoding should be used in general or only for some columns. If both dictionary and byte_stream_stream are enabled, then dictionary is preferred. - The byte_stream_split encoding is valid only for floating-point data types - and should be combined with a compression codec. + The byte_stream_split encoding is valid for integer, floating-point + and fixed-size binary data types (including decimals); it should be + combined with a compression codec so as to achieve size reduction. column_encoding : string or dict, default None Specify the encoding scheme on a per column basis. Can only be used when ``use_dictionary`` is set to False, and @@ -1107,6 +1108,19 @@ def close(self): if self.file_handle is not None: self.file_handle.close() + def add_key_value_metadata(self, key_value_metadata): + """ + Add key-value metadata to the file. + This will overwrite any existing metadata with the same key. + + Parameters + ---------- + key_value_metadata : dict + Keys and values must be string-like / coercible to bytes. + """ + assert self.is_open + self.writer.add_key_value_metadata(key_value_metadata) + def _get_pandas_index_columns(keyvalues): return (json.loads(keyvalues[b'pandas'].decode('utf8')) @@ -1299,7 +1313,7 @@ def __init__(self, path_or_paths, filesystem=None, schema=None, *, filters=None, f"local file systems, not {type(filesystem)}" ) - # check for single fragment dataset + # check for single fragment dataset or dataset directory single_file = None self._base_dir = None if not isinstance(path_or_paths, list): @@ -1313,8 +1327,6 @@ def __init__(self, path_or_paths, filesystem=None, schema=None, *, filters=None, except ValueError: filesystem = LocalFileSystem(use_mmap=memory_map) finfo = filesystem.get_file_info(path_or_paths) - if finfo.is_file: - single_file = path_or_paths if finfo.type == FileType.Directory: self._base_dir = path_or_paths else: @@ -1771,6 +1783,7 @@ def read_table(source, *, columns=None, use_threads=True, ignore_prefixes=ignore_prefixes, pre_buffer=pre_buffer, coerce_int96_timestamp_unit=coerce_int96_timestamp_unit, + decryption_properties=decryption_properties, thrift_string_size_limit=thrift_string_size_limit, thrift_container_size_limit=thrift_container_size_limit, page_checksum_verification=page_checksum_verification, diff --git a/python/pyarrow/src/arrow/python/python_to_arrow.cc b/python/pyarrow/src/arrow/python/python_to_arrow.cc index 79da47567bf24..a2a325fde8dbd 100644 --- a/python/pyarrow/src/arrow/python/python_to_arrow.cc +++ b/python/pyarrow/src/arrow/python/python_to_arrow.cc @@ -874,6 +874,10 @@ class PyListConverter : public ListConverter { if (PyArray_NDIM(ndarray) != 1) { return Status::Invalid("Can only convert 1-dimensional array values"); } + if (PyArray_ISBYTESWAPPED(ndarray)) { + // TODO + return Status::NotImplemented("Byte-swapped arrays not supported"); + } const int64_t size = PyArray_SIZE(ndarray); RETURN_NOT_OK(AppendTo(this->list_type_, size)); RETURN_NOT_OK(this->list_builder_->ValidateOverflow(size)); diff --git a/python/pyarrow/tests/conftest.py b/python/pyarrow/tests/conftest.py index 57bc3c8fc6616..343b602995db6 100644 --- a/python/pyarrow/tests/conftest.py +++ b/python/pyarrow/tests/conftest.py @@ -192,7 +192,7 @@ def wrapper(*args, **kwargs): @pytest.fixture(scope='session') def s3_server(s3_connection, tmpdir_factory): - @retry(attempts=5, delay=0.1, backoff=2) + @retry(attempts=5, delay=1, backoff=2) def minio_server_health_check(address): resp = urllib.request.urlopen(f"http://{address}/minio/health/cluster") assert resp.getcode() == 200 diff --git a/python/pyarrow/tests/parquet/test_encryption.py b/python/pyarrow/tests/parquet/test_encryption.py index edb6410d2fa0d..ff388ef506997 100644 --- a/python/pyarrow/tests/parquet/test_encryption.py +++ b/python/pyarrow/tests/parquet/test_encryption.py @@ -65,6 +65,44 @@ def basic_encryption_config(): return basic_encryption_config +def setup_encryption_environment(custom_kms_conf): + """ + Sets up and returns the KMS connection configuration and crypto factory + based on provided KMS configuration parameters. + """ + kms_connection_config = pe.KmsConnectionConfig(custom_kms_conf=custom_kms_conf) + + def kms_factory(kms_connection_configuration): + return InMemoryKmsClient(kms_connection_configuration) + + # Create our CryptoFactory + crypto_factory = pe.CryptoFactory(kms_factory) + + return kms_connection_config, crypto_factory + + +def write_encrypted_file(path, data_table, footer_key_name, col_key_name, + footer_key, col_key, encryption_config): + """ + Writes an encrypted parquet file based on the provided parameters. + """ + # Setup the custom KMS configuration with provided keys + custom_kms_conf = { + footer_key_name: footer_key.decode("UTF-8"), + col_key_name: col_key.decode("UTF-8"), + } + + # Setup encryption environment + kms_connection_config, crypto_factory = setup_encryption_environment( + custom_kms_conf) + + # Write the encrypted parquet file + write_encrypted_parquet(path, data_table, encryption_config, + kms_connection_config, crypto_factory) + + return kms_connection_config, crypto_factory + + def test_encrypted_parquet_write_read(tempdir, data_table): """Write an encrypted parquet, verify it's encrypted, and then read it.""" path = tempdir / PARQUET_NAME @@ -81,20 +119,10 @@ def test_encrypted_parquet_write_read(tempdir, data_table): cache_lifetime=timedelta(minutes=5.0), data_key_length_bits=256) - kms_connection_config = pe.KmsConnectionConfig( - custom_kms_conf={ - FOOTER_KEY_NAME: FOOTER_KEY.decode("UTF-8"), - COL_KEY_NAME: COL_KEY.decode("UTF-8"), - } - ) - - def kms_factory(kms_connection_configuration): - return InMemoryKmsClient(kms_connection_configuration) + kms_connection_config, crypto_factory = write_encrypted_file( + path, data_table, FOOTER_KEY_NAME, COL_KEY_NAME, FOOTER_KEY, COL_KEY, + encryption_config) - crypto_factory = pe.CryptoFactory(kms_factory) - # Write with encryption properties - write_encrypted_parquet(path, data_table, encryption_config, - kms_connection_config, crypto_factory) verify_file_encrypted(path) # Read with decryption properties @@ -150,36 +178,22 @@ def test_encrypted_parquet_write_read_wrong_key(tempdir, data_table): cache_lifetime=timedelta(minutes=5.0), data_key_length_bits=256) - kms_connection_config = pe.KmsConnectionConfig( - custom_kms_conf={ - FOOTER_KEY_NAME: FOOTER_KEY.decode("UTF-8"), - COL_KEY_NAME: COL_KEY.decode("UTF-8"), - } - ) - - def kms_factory(kms_connection_configuration): - return InMemoryKmsClient(kms_connection_configuration) + write_encrypted_file(path, data_table, FOOTER_KEY_NAME, COL_KEY_NAME, + FOOTER_KEY, COL_KEY, encryption_config) - crypto_factory = pe.CryptoFactory(kms_factory) - # Write with encryption properties - write_encrypted_parquet(path, data_table, encryption_config, - kms_connection_config, crypto_factory) verify_file_encrypted(path) - # Read with decryption properties - wrong_kms_connection_config = pe.KmsConnectionConfig( - custom_kms_conf={ - # Wrong keys - mixup in names - FOOTER_KEY_NAME: COL_KEY.decode("UTF-8"), - COL_KEY_NAME: FOOTER_KEY.decode("UTF-8"), - } - ) + wrong_kms_connection_config, wrong_crypto_factory = setup_encryption_environment({ + FOOTER_KEY_NAME: COL_KEY.decode("UTF-8"), # Intentionally wrong + COL_KEY_NAME: FOOTER_KEY.decode("UTF-8"), # Intentionally wrong + }) + decryption_config = pe.DecryptionConfiguration( cache_lifetime=timedelta(minutes=5.0)) with pytest.raises(ValueError, match=r"Incorrect master key used"): read_encrypted_parquet( path, decryption_config, wrong_kms_connection_config, - crypto_factory) + wrong_crypto_factory) def test_encrypted_parquet_read_no_decryption_config(tempdir, data_table): @@ -219,23 +233,12 @@ def test_encrypted_parquet_write_no_col_key(tempdir, data_table): encryption_config = pe.EncryptionConfiguration( footer_key=FOOTER_KEY_NAME) - kms_connection_config = pe.KmsConnectionConfig( - custom_kms_conf={ - FOOTER_KEY_NAME: FOOTER_KEY.decode("UTF-8"), - COL_KEY_NAME: COL_KEY.decode("UTF-8"), - } - ) - - def kms_factory(kms_connection_configuration): - return InMemoryKmsClient(kms_connection_configuration) - - crypto_factory = pe.CryptoFactory(kms_factory) with pytest.raises(OSError, match="Either column_keys or uniform_encryption " "must be set"): # Write with encryption properties - write_encrypted_parquet(path, data_table, encryption_config, - kms_connection_config, crypto_factory) + write_encrypted_file(path, data_table, FOOTER_KEY_NAME, COL_KEY_NAME, + FOOTER_KEY, b"", encryption_config) def test_encrypted_parquet_write_kms_error(tempdir, data_table, @@ -497,24 +500,11 @@ def test_encrypted_parquet_loop(tempdir, data_table, basic_encryption_config): # Encrypt the footer with the footer key, # encrypt column `a` and column `b` with another key, - # keep `c` plaintext - encryption_config = basic_encryption_config + # keep `c` plaintext, defined in basic_encryption_config + kms_connection_config, crypto_factory = write_encrypted_file( + path, data_table, FOOTER_KEY_NAME, COL_KEY_NAME, FOOTER_KEY, COL_KEY, + basic_encryption_config) - kms_connection_config = pe.KmsConnectionConfig( - custom_kms_conf={ - FOOTER_KEY_NAME: FOOTER_KEY.decode("UTF-8"), - COL_KEY_NAME: COL_KEY.decode("UTF-8"), - } - ) - - def kms_factory(kms_connection_configuration): - return InMemoryKmsClient(kms_connection_configuration) - - crypto_factory = pe.CryptoFactory(kms_factory) - - # Write with encryption properties - write_encrypted_parquet(path, data_table, encryption_config, - kms_connection_config, crypto_factory) verify_file_encrypted(path) decryption_config = pe.DecryptionConfiguration( @@ -537,32 +527,46 @@ def test_read_with_deleted_crypto_factory(tempdir, data_table, basic_encryption_ Test that decryption properties can be used if the crypto factory is no longer alive """ path = tempdir / PARQUET_NAME - encryption_config = basic_encryption_config - kms_connection_config = pe.KmsConnectionConfig( - custom_kms_conf={ - FOOTER_KEY_NAME: FOOTER_KEY.decode("UTF-8"), - COL_KEY_NAME: COL_KEY.decode("UTF-8"), - } - ) - - def kms_factory(kms_connection_configuration): - return InMemoryKmsClient(kms_connection_configuration) - - encryption_crypto_factory = pe.CryptoFactory(kms_factory) - write_encrypted_parquet(path, data_table, encryption_config, - kms_connection_config, encryption_crypto_factory) + kms_connection_config, crypto_factory = write_encrypted_file( + path, data_table, FOOTER_KEY_NAME, COL_KEY_NAME, FOOTER_KEY, COL_KEY, + basic_encryption_config) verify_file_encrypted(path) - # Use a local function to get decryption properties, so the crypto factory that - # creates the properties will be deleted after it returns. - def get_decryption_properties(): - decryption_crypto_factory = pe.CryptoFactory(kms_factory) - decryption_config = pe.DecryptionConfiguration( - cache_lifetime=timedelta(minutes=5.0)) - return decryption_crypto_factory.file_decryption_properties( - kms_connection_config, decryption_config) + # Create decryption properties and delete the crypto factory that created + # the properties afterwards. + decryption_config = pe.DecryptionConfiguration( + cache_lifetime=timedelta(minutes=5.0)) + file_decryption_properties = crypto_factory.file_decryption_properties( + kms_connection_config, decryption_config) + del crypto_factory result = pq.ParquetFile( - path, decryption_properties=get_decryption_properties()) + path, decryption_properties=file_decryption_properties) result_table = result.read(use_threads=True) assert data_table.equals(result_table) + + +def test_encrypted_parquet_read_table(tempdir, data_table, basic_encryption_config): + """Write an encrypted parquet then read it back using read_table.""" + path = tempdir / PARQUET_NAME + + # Write the encrypted parquet file using the utility function + kms_connection_config, crypto_factory = write_encrypted_file( + path, data_table, FOOTER_KEY_NAME, COL_KEY_NAME, FOOTER_KEY, COL_KEY, + basic_encryption_config) + + decryption_config = pe.DecryptionConfiguration( + cache_lifetime=timedelta(minutes=5.0)) + file_decryption_properties = crypto_factory.file_decryption_properties( + kms_connection_config, decryption_config) + + # Read the encrypted parquet file using read_table + result_table = pq.read_table(path, decryption_properties=file_decryption_properties) + + # Assert that the read table matches the original data + assert data_table.equals(result_table) + + # Read the encrypted parquet folder using read_table + result_table = pq.read_table( + tempdir, decryption_properties=file_decryption_properties) + assert data_table.equals(result_table) diff --git a/python/pyarrow/tests/parquet/test_metadata.py b/python/pyarrow/tests/parquet/test_metadata.py index bf186bd923c4f..1eb0598b5c58f 100644 --- a/python/pyarrow/tests/parquet/test_metadata.py +++ b/python/pyarrow/tests/parquet/test_metadata.py @@ -303,14 +303,18 @@ def test_parquet_write_disable_statistics(tempdir): def test_parquet_sorting_column(): sorting_col = pq.SortingColumn(10) - assert sorting_col.column_index == 10 - assert sorting_col.descending is False - assert sorting_col.nulls_first is False + assert sorting_col.to_dict() == { + 'column_index': 10, + 'descending': False, + 'nulls_first': False + } sorting_col = pq.SortingColumn(0, descending=True, nulls_first=True) - assert sorting_col.column_index == 0 - assert sorting_col.descending is True - assert sorting_col.nulls_first is True + assert sorting_col.to_dict() == { + 'column_index': 0, + 'descending': True, + 'nulls_first': True + } schema = pa.schema([('a', pa.int64()), ('b', pa.int64())]) sorting_cols = ( @@ -381,9 +385,13 @@ def test_parquet_file_sorting_columns(): # Can retrieve sorting columns from metadata metadata = pq.read_metadata(reader) - assert metadata.num_row_groups == 1 assert sorting_columns == metadata.row_group(0).sorting_columns + metadata_dict = metadata.to_dict() + assert metadata_dict.get('num_columns') == 2 + assert metadata_dict.get('num_rows') == 3 + assert metadata_dict.get('num_row_groups') == 1 + def test_field_id_metadata(): # ARROW-7080 diff --git a/python/pyarrow/tests/parquet/test_parquet_writer.py b/python/pyarrow/tests/parquet/test_parquet_writer.py index f4ee7529ae87d..bc3714a6232b1 100644 --- a/python/pyarrow/tests/parquet/test_parquet_writer.py +++ b/python/pyarrow/tests/parquet/test_parquet_writer.py @@ -346,3 +346,18 @@ def test_parquet_writer_store_schema(tempdir): meta = pq.read_metadata(path2) assert meta.metadata is None + + +def test_parquet_writer_append_key_value_metadata(tempdir): + table = pa.Table.from_arrays([pa.array([], type='int32')], ['f0']) + path = tempdir / 'metadata.parquet' + + with pq.ParquetWriter(path, table.schema) as writer: + writer.write_table(table) + writer.add_key_value_metadata({'key1': '1', 'key2': 'x'}) + writer.add_key_value_metadata({'key2': '2', 'key3': '3'}) + reader = pq.ParquetFile(path) + metadata = reader.metadata.metadata + assert metadata[b'key1'] == b'1' + assert metadata[b'key2'] == b'2' + assert metadata[b'key3'] == b'3' diff --git a/python/pyarrow/tests/test_array.py b/python/pyarrow/tests/test_array.py index 156d58326b961..88394c77e429d 100644 --- a/python/pyarrow/tests/test_array.py +++ b/python/pyarrow/tests/test_array.py @@ -1013,6 +1013,18 @@ def test_list_array_types_from_arrays_fail(list_array_type, list_type_factory): arr_slice.offsets, arr_slice.values, mask=arr_slice.is_null()) +def test_map_cast(): + # GH-38553 + t = pa.map_(pa.int64(), pa.int64()) + arr = pa.array([{1: 2}], type=t) + result = arr.cast(pa.map_(pa.int32(), pa.int64())) + + t_expected = pa.map_(pa.int32(), pa.int64()) + expected = pa.array([{1: 2}], type=t_expected) + + assert result.equals(expected) + + def test_map_labelled(): # ARROW-13735 t = pa.map_(pa.field("name", "string", nullable=False), "int64") @@ -1079,6 +1091,40 @@ def test_map_from_arrays(): pa.int64() )) + # pass in null bitmap with type + result = pa.MapArray.from_arrays([0, 2, 2, 6], keys, items, pa.map_( + keys.type, + items.type), + mask=pa.array([False, True, False], type=pa.bool_()) + ) + assert result.equals(expected) + + # pass in null bitmap without the type + result = pa.MapArray.from_arrays([0, 2, 2, 6], keys, items, + mask=pa.array([False, True, False], + type=pa.bool_()) + ) + assert result.equals(expected) + + # error if null bitmap and offsets with nulls passed + msg1 = 'Ambiguous to specify both validity map and offsets with nulls' + with pytest.raises(pa.ArrowInvalid, match=msg1): + pa.MapArray.from_arrays(offsets, keys, items, pa.map_( + keys.type, + items.type), + mask=pa.array([False, True, False], type=pa.bool_()) + ) + + # error if null bitmap passed to sliced offset + msg2 = 'Null bitmap with offsets slice not supported.' + offsets = pa.array([0, 2, 2, 6], pa.int32()) + with pytest.raises(pa.ArrowNotImplementedError, match=msg2): + pa.MapArray.from_arrays(offsets.slice(2), keys, items, pa.map_( + keys.type, + items.type), + mask=pa.array([False, True, False], type=pa.bool_()) + ) + # check invalid usage offsets = [0, 1, 3, 5] keys = np.arange(5) @@ -1099,6 +1145,30 @@ def test_map_from_arrays(): with pytest.raises(ValueError): pa.MapArray.from_arrays(offsets, keys_with_null, items) + # Check if offset in offsets > 0 + offsets = pa.array(offsets, pa.int32()) + result = pa.MapArray.from_arrays(offsets.slice(1), keys, items) + expected = pa.MapArray.from_arrays([1, 3, 5], keys, items) + + assert result.equals(expected) + assert result.offset == 1 + assert expected.offset == 0 + + offsets = pa.array([0, 0, 0, 0, 0, 0], pa.int32()) + result = pa.MapArray.from_arrays( + offsets.slice(1), + pa.array([], pa.string()), + pa.array([], pa.string()), + ) + expected = pa.MapArray.from_arrays( + [0, 0, 0, 0, 0], + pa.array([], pa.string()), + pa.array([], pa.string()), + ) + assert result.equals(expected) + assert result.offset == 1 + assert expected.offset == 0 + def test_fixed_size_list_from_arrays(): values = pa.array(range(12), pa.int64()) @@ -2757,6 +2827,7 @@ def test_list_array_flatten(offset_type, list_type_factory): assert arr1.values.equals(arr0) assert arr2.flatten().flatten().equals(arr0) assert arr2.values.values.equals(arr0) + assert arr2.flatten(True).equals(arr0) @pytest.mark.parametrize('list_type', [ @@ -2778,7 +2849,9 @@ def test_list_value_parent_indices(list_type): @pytest.mark.parametrize(('offset_type', 'list_type'), [(pa.int32(), pa.list_(pa.int32())), (pa.int32(), pa.list_(pa.int32(), list_size=2)), - (pa.int64(), pa.large_list(pa.int32()))]) + (pa.int64(), pa.large_list(pa.int32())), + (pa.int32(), pa.list_view(pa.int32())), + (pa.int64(), pa.large_list_view(pa.int32()))]) def test_list_value_lengths(offset_type, list_type): # FixedSizeListArray needs fixed list sizes @@ -2876,6 +2949,8 @@ def test_fixed_size_list_array_flatten(): assert arr0.type.equals(typ0) assert arr1.flatten().equals(arr0) assert arr2.flatten().flatten().equals(arr0) + assert arr2.flatten().equals(arr1) + assert arr2.flatten(True).equals(arr0) def test_fixed_size_list_array_flatten_with_slice(): @@ -3507,6 +3582,14 @@ def test_struct_array_sort(): {"a": 5, "b": "foo"}, ] + sorted_arr = arr.sort() + assert sorted_arr.to_pylist() == [ + {"a": 5, "b": "foo"}, + {"a": 7, "b": "bar"}, + {"a": 7, "b": "car"}, + {"a": 35, "b": "foobar"}, + ] + arr_with_nulls = pa.StructArray.from_arrays([ pa.array([5, 7, 7, 35], type=pa.int64()), pa.array(["foo", "car", "bar", "foobar"]) @@ -3573,12 +3656,23 @@ def check_run_end_encoded_from_arrays_with_type(ree_type=None): check_run_end_encoded(ree_array, run_ends, values, 19, 4, 0) +def check_run_end_encoded_from_typed_arrays(ree_type): + run_ends = [3, 5, 10, 19] + values = [1, 2, 1, 3] + typed_run_ends = pa.array(run_ends, ree_type.run_end_type) + typed_values = pa.array(values, ree_type.value_type) + ree_array = pa.RunEndEncodedArray.from_arrays(typed_run_ends, typed_values) + assert ree_array.type == ree_type + check_run_end_encoded(ree_array, run_ends, values, 19, 4, 0) + + def test_run_end_encoded_from_arrays(): check_run_end_encoded_from_arrays_with_type() for run_end_type in [pa.int16(), pa.int32(), pa.int64()]: for value_type in [pa.uint32(), pa.int32(), pa.uint64(), pa.int64()]: ree_type = pa.run_end_encoded(run_end_type, value_type) check_run_end_encoded_from_arrays_with_type(ree_type) + check_run_end_encoded_from_typed_arrays(ree_type) def test_run_end_encoded_from_buffers(): @@ -3844,6 +3938,7 @@ def test_list_view_flatten(list_array_type, list_type_factory, offset_type): assert arr2.values.equals(arr1) assert arr2.flatten().flatten().equals(arr0) assert arr2.values.values.equals(arr0) + assert arr2.flatten(True).equals(arr0) # test out of order offsets values = [1, 2, 3, 4] @@ -3879,3 +3974,27 @@ def test_list_view_slice(list_view_type): j = sliced_array.offsets[1].as_py() assert sliced_array[0].as_py() == sliced_array.values[i:j].to_pylist() == [4] + + +@pytest.mark.parametrize('numpy_native_dtype', ['u2', 'i4', 'f8']) +def test_swapped_byte_order_fails(numpy_native_dtype): + # ARROW-39129 + + numpy_swapped_dtype = np.dtype(numpy_native_dtype).newbyteorder() + np_arr = np.arange(10, dtype=numpy_swapped_dtype) + + # Primitive type array, type is inferred from the numpy array + with pytest.raises(pa.ArrowNotImplementedError): + pa.array(np_arr) + + # Primitive type array, type is explicitly provided + with pytest.raises(pa.ArrowNotImplementedError): + pa.array(np_arr, type=pa.float64()) + + # List type array + with pytest.raises(pa.ArrowNotImplementedError): + pa.array([np_arr]) + + # Struct type array + with pytest.raises(pa.ArrowNotImplementedError): + pa.StructArray.from_arrays([np_arr], names=['a']) diff --git a/python/pyarrow/tests/test_cffi.py b/python/pyarrow/tests/test_cffi.py index 5bf41c3c14b6e..45a3db9b66fc5 100644 --- a/python/pyarrow/tests/test_cffi.py +++ b/python/pyarrow/tests/test_cffi.py @@ -45,7 +45,7 @@ ValueError, match="Cannot import released ArrowArray") assert_stream_released = pytest.raises( - ValueError, match="Cannot import released ArrowArrayStream") + ValueError, match="Cannot import released Arrow Stream") def PyCapsule_IsValid(capsule, name): diff --git a/python/pyarrow/tests/test_compute.py b/python/pyarrow/tests/test_compute.py index 98cbd920b509b..d7dee1ad05e93 100644 --- a/python/pyarrow/tests/test_compute.py +++ b/python/pyarrow/tests/test_compute.py @@ -152,6 +152,7 @@ def test_option_class_equality(): pc.IndexOptions(pa.scalar(1)), pc.JoinOptions(), pc.ListSliceOptions(0, -1, 1, True), + pc.ListFlattenOptions(recursive=False), pc.MakeStructOptions(["field", "names"], field_nullability=[True, True], field_metadata=[pa.KeyValueMetadata({"a": "1"}), @@ -1344,6 +1345,11 @@ def test_filter_record_batch(): expected = pa.record_batch([pa.array(["a", "e"])], names=["a'"]) assert result.equals(expected) + # GH-38770: mask is chunked array + chunked_mask = pa.chunked_array([[True, False], [None], [False, True]]) + result = batch.filter(chunked_mask) + assert result.equals(expected) + result = batch.filter(mask, null_selection_behavior="emit_null") expected = pa.record_batch([pa.array(["a", None, "e"])], names=["a'"]) assert result.equals(expected) diff --git a/python/pyarrow/tests/test_csv.py b/python/pyarrow/tests/test_csv.py index bc1dd8a09a768..9ddb5197e9120 100644 --- a/python/pyarrow/tests/test_csv.py +++ b/python/pyarrow/tests/test_csv.py @@ -1470,7 +1470,7 @@ def signal_from_thread(): pytest.fail("Failed to get an interruption during CSV reading") # Interruption should have arrived timely - assert last_duration <= 1.0 + assert last_duration <= 2.0 e = exc_info.__context__ assert isinstance(e, pa.ArrowCancelled) assert e.signum == signum diff --git a/python/pyarrow/tests/test_dataset_encryption.py b/python/pyarrow/tests/test_dataset_encryption.py index 2a631db9fc0fa..0d8b4a152ab9f 100644 --- a/python/pyarrow/tests/test_dataset_encryption.py +++ b/python/pyarrow/tests/test_dataset_encryption.py @@ -142,6 +142,18 @@ def test_dataset_encryption_decryption(): assert table.equals(dataset.to_table()) + # set decryption properties for parquet fragment scan options + decryption_properties = crypto_factory.file_decryption_properties( + kms_connection_config, decryption_config) + pq_scan_opts = ds.ParquetFragmentScanOptions( + decryption_properties=decryption_properties + ) + + pformat = pa.dataset.ParquetFileFormat(default_fragment_scan_options=pq_scan_opts) + dataset = ds.dataset("sample_dataset", format=pformat, filesystem=mockfs) + + assert table.equals(dataset.to_table()) + @pytest.mark.skipif( not encryption_unavailable, reason="Parquet Encryption is currently enabled" diff --git a/python/pyarrow/tests/test_device.py b/python/pyarrow/tests/test_device.py new file mode 100644 index 0000000000000..6bdb015be1a95 --- /dev/null +++ b/python/pyarrow/tests/test_device.py @@ -0,0 +1,43 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import pyarrow as pa + + +def test_device_memory_manager(): + mm = pa.default_cpu_memory_manager() + assert mm.is_cpu + device = mm.device + assert device.is_cpu + assert device.device_id == -1 + assert device.device_type == pa.DeviceAllocationType.CPU + assert device.type_name == "arrow::CPUDevice" + assert device == device + assert repr(device) == "" + assert repr(mm) == "" + + +def test_buffer_device(): + arr = pa.array([0, 1, 2]) + buf = arr.buffers()[1] + assert buf.device_type == pa.DeviceAllocationType.CPU + assert isinstance(buf.device, pa.Device) + assert isinstance(buf.memory_manager, pa.MemoryManager) + assert buf.is_cpu + assert buf.device.is_cpu + assert buf.device == pa.default_cpu_memory_manager().device + assert buf.memory_manager.is_cpu diff --git a/python/pyarrow/tests/test_extension_type.py b/python/pyarrow/tests/test_extension_type.py index fe38bf651baae..9863d96058947 100644 --- a/python/pyarrow/tests/test_extension_type.py +++ b/python/pyarrow/tests/test_extension_type.py @@ -251,14 +251,14 @@ def test_ext_type_repr(): assert repr(ty) == "IntegerType(DataType(int64))" -def test_ext_type__lifetime(): +def test_ext_type_lifetime(): ty = UuidType() wr = weakref.ref(ty) del ty assert wr() is None -def test_ext_type__storage_type(): +def test_ext_type_storage_type(): ty = UuidType() assert ty.storage_type == pa.binary(16) assert ty.__class__ is UuidType @@ -267,6 +267,32 @@ def test_ext_type__storage_type(): assert ty.__class__ is ParamExtType +def test_ext_type_byte_width(): + # Test for fixed-size binary types + ty = UuidType() + assert ty.byte_width == 16 + ty = ParamExtType(5) + assert ty.byte_width == 5 + + # Test for non fixed-size binary types + ty = LabelType() + with pytest.raises(ValueError, match="Non-fixed width type"): + _ = ty.byte_width + + +def test_ext_type_bit_width(): + # Test for fixed-size binary types + ty = UuidType() + assert ty.bit_width == 128 + ty = ParamExtType(5) + assert ty.bit_width == 40 + + # Test for non fixed-size binary types + ty = LabelType() + with pytest.raises(ValueError, match="Non-fixed width type"): + _ = ty.bit_width + + def test_ext_type_as_py(): ty = UuidType() expected = uuid4() diff --git a/python/pyarrow/tests/test_fs.py b/python/pyarrow/tests/test_fs.py index 845f1eccecc72..58380f1652558 100644 --- a/python/pyarrow/tests/test_fs.py +++ b/python/pyarrow/tests/test_fs.py @@ -1226,6 +1226,11 @@ def test_s3_options(pickle_module): assert isinstance(fs, S3FileSystem) assert pickle_module.loads(pickle_module.dumps(fs)) == fs + fs = S3FileSystem(allow_bucket_creation=True, allow_bucket_deletion=True, + check_directory_existence_before_creation=True) + assert isinstance(fs, S3FileSystem) + assert pickle_module.loads(pickle_module.dumps(fs)) == fs + fs = S3FileSystem(request_timeout=0.5, connect_timeout=0.25) assert isinstance(fs, S3FileSystem) assert pickle_module.loads(pickle_module.dumps(fs)) == fs diff --git a/python/pyarrow/tests/test_misc.py b/python/pyarrow/tests/test_misc.py index 39dac4eb81dfb..308c37fd0de1e 100644 --- a/python/pyarrow/tests/test_misc.py +++ b/python/pyarrow/tests/test_misc.py @@ -242,6 +242,8 @@ def test_set_timezone_db_path_non_windows(): pa.MemoryPool, pa.LoggingMemoryPool, pa.ProxyMemoryPool, + pa.Device, + pa.MemoryManager, ]) def test_extension_type_constructor_errors(klass): # ARROW-2638: prevent calling extension class constructors directly diff --git a/python/pyarrow/tests/test_pandas.py b/python/pyarrow/tests/test_pandas.py index 3678b4e57a9a8..be2c5b14e68b0 100644 --- a/python/pyarrow/tests/test_pandas.py +++ b/python/pyarrow/tests/test_pandas.py @@ -221,6 +221,17 @@ def test_column_index_names_with_tz(self): ) _check_pandas_roundtrip(df, preserve_index=True) + def test_column_index_names_with_decimal(self): + # GH-41503: Test valid roundtrip with decimal value in column index + df = pd.DataFrame( + [[decimal.Decimal(5), decimal.Decimal(6)]], + columns=pd.MultiIndex.from_product( + [[decimal.Decimal(1)], [decimal.Decimal(2), decimal.Decimal(3)]] + ), + index=[decimal.Decimal(4)], + ) + _check_pandas_roundtrip(df, preserve_index=True) + def test_range_index_shortcut(self): # ARROW-1639 index_name = 'foo' diff --git a/python/pyarrow/tests/test_types.py b/python/pyarrow/tests/test_types.py index 4f66a6f41672d..f7b6040f510af 100644 --- a/python/pyarrow/tests/test_types.py +++ b/python/pyarrow/tests/test_types.py @@ -1331,10 +1331,13 @@ def __init__(self, schema): def __arrow_c_schema__(self): return self.schema.__arrow_c_schema__() - schema = pa.schema([pa.field("field_name", pa.int32())]) + schema = pa.schema([pa.field("field_name", pa.int32())], metadata={"a": "b"}) + assert schema.metadata == {b"a": b"b"} wrapped_schema = Wrapper(schema) assert pa.schema(wrapped_schema) == schema + assert pa.schema(wrapped_schema).metadata == {b"a": b"b"} + assert pa.schema(wrapped_schema, metadata={"a": "c"}).metadata == {b"a": b"c"} def test_field_import_c_schema_interface(): diff --git a/python/pyarrow/types.pxi b/python/pyarrow/types.pxi index 018099ae7e659..5113df36557f4 100644 --- a/python/pyarrow/types.pxi +++ b/python/pyarrow/types.pxi @@ -1519,6 +1519,24 @@ cdef class BaseExtensionType(DataType): """ return pyarrow_wrap_data_type(self.ext_type.storage_type()) + @property + def byte_width(self): + """ + The byte width of the extension type. + """ + if self.ext_type.byte_width() == -1: + raise ValueError("Non-fixed width type") + return self.ext_type.byte_width() + + @property + def bit_width(self): + """ + The bit width of the extension type. + """ + if self.ext_type.bit_width() == -1: + raise ValueError("Non-fixed width type") + return self.ext_type.bit_width() + def wrap_array(self, storage): """ Wrap the given storage array as an extension array. @@ -5332,7 +5350,10 @@ def schema(fields, metadata=None): if isinstance(fields, Mapping): fields = fields.items() elif hasattr(fields, "__arrow_c_schema__"): - return Schema._import_from_c_capsule(fields.__arrow_c_schema__()) + result = Schema._import_from_c_capsule(fields.__arrow_c_schema__()) + if metadata is not None: + result = result.with_metadata(metadata) + return result for item in fields: if isinstance(item, tuple): diff --git a/python/pyproject.toml b/python/pyproject.toml index 1588e690a7247..86a90906d02f9 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -24,7 +24,63 @@ requires = [ # continue using oldest-support-numpy. "oldest-supported-numpy>=0.14; python_version<'3.9'", "numpy>=1.25; python_version>='3.9'", - "setuptools_scm", - "setuptools >= 40.1.0", - "wheel" + # configuring setuptools_scm in pyproject.toml requires + # versions released after 2022 + "setuptools_scm[toml]>=8", + "setuptools>=64", ] +build-backend = "setuptools.build_meta" + +[project] +name = "pyarrow" +dynamic = ["version"] +requires-python = ">=3.8" +dependencies = [ + "numpy >= 1.16.6" +] +description = "Python library for Apache Arrow" +readme = {file = "README.md", content-type = "text/markdown"} +license = {text = "Apache Software License"} +classifiers = [ + 'License :: OSI Approved :: Apache Software License', + 'Programming Language :: Python :: 3.8', + 'Programming Language :: Python :: 3.9', + 'Programming Language :: Python :: 3.10', + 'Programming Language :: Python :: 3.11', + 'Programming Language :: Python :: 3.12', +] +maintainers = [ + {name = "Apache Arrow Developers", email = "dev@arrow.apache.org"} +] + +[project.urls] +Homepage = "https://arrow.apache.org/" +Documentation = "https://arrow.apache.org/docs/python" +Repository = "https://github.com/apache/arrow" +Issues = "https://github.com/apache/arrow/issues" + +[project.optional-dependencies] +test = [ + 'pytest', + 'hypothesis', + 'cffi', + 'pytz', + 'pandas' +] + +[tool.setuptools] +zip-safe=false +include-package-data=true + +[tool.setuptools.packages.find] +where = ["."] + +[tool.setuptools.package-data] +pyarrow = ["*.pxd", "*.pyx", "includes/*.pxd"] + +[tool.setuptools_scm] +root = '..' +version_file = 'pyarrow/_generated_version.py' +version_scheme = 'guess-next-dev' +git_describe_command = 'git describe --dirty --tags --long --match "apache-arrow-[0-9]*.*"' +fallback_version = '17.0.0a0' diff --git a/python/requirements-build.txt b/python/requirements-build.txt index 87dcc148ad161..c150c842a0cc6 100644 --- a/python/requirements-build.txt +++ b/python/requirements-build.txt @@ -1,5 +1,5 @@ cython>=0.29.31 oldest-supported-numpy>=0.14; python_version<'3.9' numpy>=1.25; python_version>='3.9' -setuptools_scm -setuptools>=38.6.0 +setuptools_scm>=8 +setuptools>=64 diff --git a/python/setup.py b/python/setup.py index 6f3dddb29d248..b738b2f77290e 100755 --- a/python/setup.py +++ b/python/setup.py @@ -152,32 +152,20 @@ def initialize_options(self): if not hasattr(sys, 'gettotalrefcount'): self.build_type = 'release' - self.with_azure = strtobool( - os.environ.get('PYARROW_WITH_AZURE', '0')) - self.with_gcs = strtobool( - os.environ.get('PYARROW_WITH_GCS', '0')) - self.with_s3 = strtobool( - os.environ.get('PYARROW_WITH_S3', '0')) - self.with_hdfs = strtobool( - os.environ.get('PYARROW_WITH_HDFS', '0')) - self.with_cuda = strtobool( - os.environ.get('PYARROW_WITH_CUDA', '0')) - self.with_substrait = strtobool( - os.environ.get('PYARROW_WITH_SUBSTRAIT', '0')) - self.with_flight = strtobool( - os.environ.get('PYARROW_WITH_FLIGHT', '0')) - self.with_acero = strtobool( - os.environ.get('PYARROW_WITH_ACERO', '0')) - self.with_dataset = strtobool( - os.environ.get('PYARROW_WITH_DATASET', '0')) - self.with_parquet = strtobool( - os.environ.get('PYARROW_WITH_PARQUET', '0')) - self.with_parquet_encryption = strtobool( - os.environ.get('PYARROW_WITH_PARQUET_ENCRYPTION', '0')) - self.with_orc = strtobool( - os.environ.get('PYARROW_WITH_ORC', '0')) - self.with_gandiva = strtobool( - os.environ.get('PYARROW_WITH_GANDIVA', '0')) + self.with_azure = None + self.with_gcs = None + self.with_s3 = None + self.with_hdfs = None + self.with_cuda = None + self.with_substrait = None + self.with_flight = None + self.with_acero = None + self.with_dataset = None + self.with_parquet = None + self.with_parquet_encryption = None + self.with_orc = None + self.with_gandiva = None + self.generate_coverage = strtobool( os.environ.get('PYARROW_GENERATE_COVERAGE', '0')) self.bundle_arrow_cpp = strtobool( @@ -185,15 +173,6 @@ def initialize_options(self): self.bundle_cython_cpp = strtobool( os.environ.get('PYARROW_BUNDLE_CYTHON_CPP', '0')) - self.with_parquet_encryption = (self.with_parquet_encryption and - self.with_parquet) - - # enforce module dependencies - if self.with_substrait: - self.with_dataset = True - if self.with_dataset: - self.with_acero = True - CYTHON_MODULE_NAMES = [ 'lib', '_fs', @@ -270,23 +249,30 @@ def append_cmake_bool(value, varname): cmake_options.append('-D{0}={1}'.format( varname, 'on' if value else 'off')) + def append_cmake_component(flag, varname): + # only pass this to cmake is the user pass the --with-component + # flag to setup.py build_ext + if flag is not None: + append_cmake_bool(flag, varname) + if self.cmake_generator: cmake_options += ['-G', self.cmake_generator] - append_cmake_bool(self.with_cuda, 'PYARROW_BUILD_CUDA') - append_cmake_bool(self.with_substrait, 'PYARROW_BUILD_SUBSTRAIT') - append_cmake_bool(self.with_flight, 'PYARROW_BUILD_FLIGHT') - append_cmake_bool(self.with_gandiva, 'PYARROW_BUILD_GANDIVA') - append_cmake_bool(self.with_acero, 'PYARROW_BUILD_ACERO') - append_cmake_bool(self.with_dataset, 'PYARROW_BUILD_DATASET') - append_cmake_bool(self.with_orc, 'PYARROW_BUILD_ORC') - append_cmake_bool(self.with_parquet, 'PYARROW_BUILD_PARQUET') - append_cmake_bool(self.with_parquet_encryption, - 'PYARROW_BUILD_PARQUET_ENCRYPTION') - append_cmake_bool(self.with_azure, 'PYARROW_BUILD_AZURE') - append_cmake_bool(self.with_gcs, 'PYARROW_BUILD_GCS') - append_cmake_bool(self.with_s3, 'PYARROW_BUILD_S3') - append_cmake_bool(self.with_hdfs, 'PYARROW_BUILD_HDFS') + append_cmake_component(self.with_cuda, 'PYARROW_CUDA') + append_cmake_component(self.with_substrait, 'PYARROW_SUBSTRAIT') + append_cmake_component(self.with_flight, 'PYARROW_FLIGHT') + append_cmake_component(self.with_gandiva, 'PYARROW_GANDIVA') + append_cmake_component(self.with_acero, 'PYARROW_ACERO') + append_cmake_component(self.with_dataset, 'PYARROW_DATASET') + append_cmake_component(self.with_orc, 'PYARROW_ORC') + append_cmake_component(self.with_parquet, 'PYARROW_PARQUET') + append_cmake_component(self.with_parquet_encryption, + 'PYARROW_PARQUET_ENCRYPTION') + append_cmake_component(self.with_azure, 'PYARROW_AZURE') + append_cmake_component(self.with_gcs, 'PYARROW_GCS') + append_cmake_component(self.with_s3, 'PYARROW_S3') + append_cmake_component(self.with_hdfs, 'PYARROW_HDFS') + append_cmake_bool(self.bundle_arrow_cpp, 'PYARROW_BUNDLE_ARROW_CPP') append_cmake_bool(self.bundle_cython_cpp, @@ -329,54 +315,8 @@ def append_cmake_bool(value, varname): self._found_names = [] for name in self.CYTHON_MODULE_NAMES: built_path = pjoin(install_prefix, name + ext_suffix) - if not os.path.exists(built_path): - print(f'Did not find {built_path}') - if self._failure_permitted(name): - print(f'Cython module {name} failure permitted') - continue - raise RuntimeError('PyArrow C-extension failed to build:', - os.path.abspath(built_path)) - - self._found_names.append(name) - - def _failure_permitted(self, name): - if name == '_parquet' and not self.with_parquet: - return True - if name == '_parquet_encryption' and not self.with_parquet_encryption: - return True - if name == '_orc' and not self.with_orc: - return True - if name == '_flight' and not self.with_flight: - return True - if name == '_substrait' and not self.with_substrait: - return True - if name == '_azurefs' and not self.with_azure: - return True - if name == '_gcsfs' and not self.with_gcs: - return True - if name == '_s3fs' and not self.with_s3: - return True - if name == '_hdfs' and not self.with_hdfs: - return True - if name == '_dataset' and not self.with_dataset: - return True - if name == '_acero' and not self.with_acero: - return True - if name == '_exec_plan' and not self.with_acero: - return True - if name == '_dataset_orc' and not ( - self.with_orc and self.with_dataset - ): - return True - if name == '_dataset_parquet' and not ( - self.with_parquet and self.with_dataset - ): - return True - if name == '_cuda' and not self.with_cuda: - return True - if name == 'gandiva' and not self.with_gandiva: - return True - return False + if os.path.exists(built_path): + self._found_names.append(name) def _get_build_dir(self): # Get the package directory from build_py @@ -412,61 +352,11 @@ def get_outputs(self): for name in self.get_names()] -# If the event of not running from a git clone (e.g. from a git archive -# or a Python sdist), see if we can set the version number ourselves -default_version = '17.0.0-SNAPSHOT' -if (not os.path.exists('../.git') and - not os.environ.get('SETUPTOOLS_SCM_PRETEND_VERSION')): - os.environ['SETUPTOOLS_SCM_PRETEND_VERSION'] = \ - default_version.replace('-SNAPSHOT', 'a0') - - -# See https://github.com/pypa/setuptools_scm#configuration-parameters -scm_version_write_to_prefix = os.environ.get( - 'SETUPTOOLS_SCM_VERSION_WRITE_TO_PREFIX', setup_dir) - - -def parse_git(root, **kwargs): - """ - Parse function for setuptools_scm that ignores tags for non-C++ - subprojects, e.g. apache-arrow-js-XXX tags. - """ - from setuptools_scm.git import parse - kwargs['describe_command'] =\ - 'git describe --dirty --tags --long --match "apache-arrow-[0-9]*.*"' - return parse(root, **kwargs) - - -def guess_next_dev_version(version): - if version.exact: - return version.format_with('{tag}') - else: - def guess_next_version(tag_version): - return default_version.replace('-SNAPSHOT', '') - return version.format_next_version(guess_next_version) - - -with open('README.md') as f: - long_description = f.read() - - class BinaryDistribution(Distribution): def has_ext_modules(foo): return True -install_requires = ( - 'numpy >= 1.16.6', -) - - -# Only include pytest-runner in setup_requires if we're invoking tests -if {'pytest', 'test', 'ptr'}.intersection(sys.argv): - setup_requires = ['pytest-runner'] -else: - setup_requires = [] - - if strtobool(os.environ.get('PYARROW_INSTALL_TESTS', '1')): packages = find_namespace_packages(include=['pyarrow*']) exclude_package_data = {} @@ -480,11 +370,7 @@ def has_ext_modules(foo): setup( - name='pyarrow', packages=packages, - zip_safe=False, - package_data={'pyarrow': ['*.pxd', '*.pyx', 'includes/*.pxd']}, - include_package_data=True, exclude_package_data=exclude_package_data, distclass=BinaryDistribution, # Dummy extension to trigger build_ext @@ -492,35 +378,4 @@ def has_ext_modules(foo): cmdclass={ 'build_ext': build_ext }, - use_scm_version={ - 'root': os.path.dirname(setup_dir), - 'parse': parse_git, - 'write_to': os.path.join(scm_version_write_to_prefix, - 'pyarrow/_generated_version.py'), - 'version_scheme': guess_next_dev_version - }, - setup_requires=['setuptools_scm', 'cython >= 0.29.31'] + setup_requires, - install_requires=install_requires, - tests_require=['pytest', 'pandas', 'hypothesis'], - python_requires='>=3.8', - description='Python library for Apache Arrow', - long_description=long_description, - long_description_content_type='text/markdown', - classifiers=[ - 'License :: OSI Approved :: Apache Software License', - 'Programming Language :: Python :: 3.8', - 'Programming Language :: Python :: 3.9', - 'Programming Language :: Python :: 3.10', - 'Programming Language :: Python :: 3.11', - 'Programming Language :: Python :: 3.12', - ], - license='Apache License, Version 2.0', - maintainer='Apache Arrow Developers', - maintainer_email='dev@arrow.apache.org', - test_suite='pyarrow.tests', - url='https://arrow.apache.org/', - project_urls={ - 'Documentation': 'https://arrow.apache.org/docs/python', - 'Source': 'https://github.com/apache/arrow', - }, ) diff --git a/r/DESCRIPTION b/r/DESCRIPTION index eeff8168b361c..bb4470e29037d 100644 --- a/r/DESCRIPTION +++ b/r/DESCRIPTION @@ -1,6 +1,6 @@ Package: arrow Title: Integration to 'Apache' 'Arrow' -Version: 16.0.0.9000 +Version: 16.1.0.9000 Authors@R: c( person("Neal", "Richardson", email = "neal.p.richardson@gmail.com", role = c("aut")), person("Ian", "Cook", email = "ianmcook@gmail.com", role = c("aut")), @@ -21,7 +21,7 @@ Description: 'Apache' 'Arrow' is a cross-language language-independent columnar memory format for flat and hierarchical data, organized for efficient analytic operations on modern hardware. This package provides an interface to the 'Arrow C++' library. -Depends: R (>= 3.4) +Depends: R (>= 4.0) License: Apache License (>= 2.0) URL: https://github.com/apache/arrow/, https://arrow.apache.org/docs/r/ BugReports: https://github.com/apache/arrow/issues diff --git a/r/Makefile b/r/Makefile index c3267e8cfe45b..785e9e1214d4f 100644 --- a/r/Makefile +++ b/r/Makefile @@ -52,11 +52,11 @@ build: doc sync-cpp R CMD build ${args} . check: build - -export _R_CHECK_CRAN_INCOMING_REMOTE_=FALSE && export ARROW_R_DEV=$(ARROW_R_DEV) && export _R_CHECK_TESTS_NLINES_=0 && R CMD check --as-cran --run-donttest arrow_$(VERSION).tar.gz + -export _R_CHECK_CRAN_INCOMING_REMOTE_=FALSE && export ARROW_R_DEV=$(ARROW_R_DEV) && export _R_CHECK_TESTS_NLINES_=0 && R CMD check --as-cran arrow_$(VERSION).tar.gz rm -rf arrow.Rcheck/ release: build - -export _R_CHECK_TESTS_NLINES_=0 && R CMD check --as-cran --run-donttest arrow_$(VERSION).tar.gz + -export _R_CHECK_TESTS_NLINES_=0 && R CMD check --as-cran arrow_$(VERSION).tar.gz rm -rf arrow.Rcheck/ clean: diff --git a/r/NEWS.md b/r/NEWS.md index 4ed9f28a28436..dc89fa266e3ef 100644 --- a/r/NEWS.md +++ b/r/NEWS.md @@ -17,18 +17,30 @@ under the License. --> -# arrow 16.0.0.9000 +# arrow 16.1.0.9000 + +# arrow 16.1.0 * R functions that users write that use functions that Arrow supports in dataset queries now can be used in queries too. Previously, only functions that used arithmetic operators worked. For example, `time_hours <- function(mins) mins / 60` worked, but `time_hours_rounded <- function(mins) round(mins / 60)` did not; now both work. These are automatic translations rather than true user-defined functions (UDFs); for UDFs, see `register_scalar_function()`. (#41223) * `summarize()` supports more complex expressions, and correctly handles cases where column names are reused in expressions. +* The `na_matches` argument to the `dplyr::*_join()` functions is now supported. This argument controls whether `NA` values are considered equal when joining. (#41358) + +# arrow 16.1.0 -# arrow 16.0.0 +## New features -# arrow 15.0.2 +* Streams can now be written to socket connections (#38897) +* The Arrow R package now can be built with older versions of the Arrow C++ library (back to 13.0.0) (#39738) -# arrow 15.0.1 +## Minor improvements and fixes + +* Dataset and table output printing now truncates schemas longer than 20 items long (#38916) +* Fixed pointer conversion to Python for latest reticulate to ensure data can be passed between Arrow and PyArrow (#39969) +* Check on macOS if we are using GNU libtool is and ensure we use macOS libtool instead (#40259) +* Fix an error where creating a bundled tarball with all dependencies was failing on Windows (@hutch3232, #40232) -# arrow 15.0.0 + +# arrow 15.0.1 ## New features diff --git a/r/R/arrow-package.R b/r/R/arrow-package.R index f6977e626276b..44dfbbcd5c7e7 100644 --- a/r/R/arrow-package.R +++ b/r/R/arrow-package.R @@ -48,10 +48,7 @@ supported_dplyr_methods <- list( group_vars = NULL, group_by_drop_default = NULL, ungroup = NULL, - mutate = c( - "window functions (e.g. things that require aggregation within groups)", - "not currently supported" - ), + mutate = NULL, transmute = NULL, arrange = NULL, rename = NULL, @@ -66,12 +63,12 @@ supported_dplyr_methods <- list( compute = NULL, collapse = NULL, distinct = "`.keep_all = TRUE` not supported", - left_join = "the `copy` and `na_matches` arguments are ignored", - right_join = "the `copy` and `na_matches` arguments are ignored", - inner_join = "the `copy` and `na_matches` arguments are ignored", - full_join = "the `copy` and `na_matches` arguments are ignored", - semi_join = "the `copy` and `na_matches` arguments are ignored", - anti_join = "the `copy` and `na_matches` arguments are ignored", + left_join = "the `copy` argument is ignored", + right_join = "the `copy` argument is ignored", + inner_join = "the `copy` argument is ignored", + full_join = "the `copy` argument is ignored", + semi_join = "the `copy` argument is ignored", + anti_join = "the `copy` argument is ignored", count = NULL, tally = NULL, rename_with = NULL, diff --git a/r/R/arrowExports.R b/r/R/arrowExports.R index 752d3a266b26a..62e2182ffcd52 100644 --- a/r/R/arrowExports.R +++ b/r/R/arrowExports.R @@ -484,8 +484,8 @@ ExecNode_Aggregate <- function(input, options, key_names) { .Call(`_arrow_ExecNode_Aggregate`, input, options, key_names) } -ExecNode_Join <- function(input, join_type, right_data, left_keys, right_keys, left_output, right_output, output_suffix_for_left, output_suffix_for_right) { - .Call(`_arrow_ExecNode_Join`, input, join_type, right_data, left_keys, right_keys, left_output, right_output, output_suffix_for_left, output_suffix_for_right) +ExecNode_Join <- function(input, join_type, right_data, left_keys, right_keys, left_output, right_output, output_suffix_for_left, output_suffix_for_right, na_matches) { + .Call(`_arrow_ExecNode_Join`, input, join_type, right_data, left_keys, right_keys, left_output, right_output, output_suffix_for_left, output_suffix_for_right, na_matches) } ExecNode_Union <- function(input, right_data) { diff --git a/r/R/dplyr-across.R b/r/R/dplyr-across.R index 0d85764f7fb35..6aeedc18f375e 100644 --- a/r/R/dplyr-across.R +++ b/r/R/dplyr-across.R @@ -34,7 +34,11 @@ expand_across <- function(.data, quos_in, exclude_cols = NULL) { ) if (!all(names(across_call[-1]) %in% c(".cols", ".fns", ".names"))) { - abort("`...` argument to `across()` is deprecated in dplyr and not supported in Arrow") + arrow_not_supported( + "`...` argument to `across()` is deprecated in dplyr and", + body = c(">" = "Convert your call into a function or formula including the arguments"), + call = rlang::caller_call() + ) } if (!is.null(across_call[[".cols"]])) { diff --git a/r/R/dplyr-arrange.R b/r/R/dplyr-arrange.R index f91cd14211e0f..fdc69a708d15d 100644 --- a/r/R/dplyr-arrange.R +++ b/r/R/dplyr-arrange.R @@ -19,39 +19,46 @@ # The following S3 methods are registered on load if dplyr is present arrange.arrow_dplyr_query <- function(.data, ..., .by_group = FALSE) { - call <- match.call() - .data <- as_adq(.data) - exprs <- expand_across(.data, quos(...)) + try_arrow_dplyr({ + .data <- as_adq(.data) + exprs <- expand_across(.data, quos(...)) - if (.by_group) { - # when the data is grouped and .by_group is TRUE, order the result by - # the grouping columns first - exprs <- c(quos(!!!dplyr::groups(.data)), exprs) - } - if (length(exprs) == 0) { - # Nothing to do - return(.data) - } - .data <- as_adq(.data) - # find and remove any dplyr::desc() and tidy-eval - # the arrange expressions inside an Arrow data_mask - sorts <- vector("list", length(exprs)) - descs <- logical(0) - mask <- arrow_mask(.data) - for (i in seq_along(exprs)) { - x <- find_and_remove_desc(exprs[[i]]) - exprs[[i]] <- x[["quos"]] - sorts[[i]] <- arrow_eval(exprs[[i]], mask) - names(sorts)[i] <- format_expr(exprs[[i]]) - if (inherits(sorts[[i]], "try-error")) { - msg <- paste("Expression", names(sorts)[i], "not supported in Arrow") - return(abandon_ship(call, .data, msg)) + if (.by_group) { + # when the data is grouped and .by_group is TRUE, order the result by + # the grouping columns first + exprs <- c(quos(!!!dplyr::groups(.data)), exprs) } - descs[i] <- x[["desc"]] - } - .data$arrange_vars <- c(sorts, .data$arrange_vars) - .data$arrange_desc <- c(descs, .data$arrange_desc) - .data + if (length(exprs) == 0) { + # Nothing to do + return(.data) + } + .data <- as_adq(.data) + # find and remove any dplyr::desc() and tidy-eval + # the arrange expressions inside an Arrow data_mask + sorts <- vector("list", length(exprs)) + descs <- logical(0) + mask <- arrow_mask(.data) + for (i in seq_along(exprs)) { + x <- find_and_remove_desc(exprs[[i]]) + exprs[[i]] <- x[["quos"]] + sorts[[i]] <- arrow_eval(exprs[[i]], mask) + names(sorts)[i] <- format_expr(exprs[[i]]) + if (length(mask$.aggregations)) { + # dplyr lets you arrange on e.g. x < mean(x), but we haven't implemented it. + # But we could, the same way it works in mutate() via join, if someone asks. + # Until then, just error. + # TODO: add a test for this + arrow_not_supported( + .actual_msg = "Expression not supported in arrange() in Arrow", + call = expr + ) + } + descs[i] <- x[["desc"]] + } + .data$arrange_vars <- c(sorts, .data$arrange_vars) + .data$arrange_desc <- c(descs, .data$arrange_desc) + .data + }) } arrange.Dataset <- arrange.ArrowTabular <- arrange.RecordBatchReader <- arrange.arrow_dplyr_query @@ -65,10 +72,9 @@ find_and_remove_desc <- function(quosure) { expr <- quo_get_expr(quosure) descending <- FALSE if (length(all.vars(expr)) < 1L) { - stop( - "Expression in arrange() does not contain any field names: ", - deparse(expr), - call. = FALSE + validation_error( + "Expression in arrange() does not contain any field names", + call = quosure ) } # Use a while loop to remove any number of nested pairs of enclosing @@ -82,7 +88,10 @@ find_and_remove_desc <- function(quosure) { # ensure desc() has only one argument (when an R expression is a function # call, length == 2 means it has exactly one argument) if (length(expr) > 2) { - stop("desc() expects only one argument", call. = FALSE) + validation_error( + "desc() expects only one argument", + call = expr + ) } # remove desc() and toggle descending expr <- expr[[2]] diff --git a/r/R/dplyr-datetime-helpers.R b/r/R/dplyr-datetime-helpers.R index c153f47cbafdb..8e6a7f6185366 100644 --- a/r/R/dplyr-datetime-helpers.R +++ b/r/R/dplyr-datetime-helpers.R @@ -18,10 +18,10 @@ check_time_locale <- function(locale = Sys.getlocale("LC_TIME")) { if (tolower(Sys.info()[["sysname"]]) == "windows" && locale != "C") { # MingW C++ std::locale only supports "C" and "POSIX" - stop(paste0( - "On Windows, time locales other than 'C' are not supported in Arrow. ", - "Consider setting `Sys.setlocale('LC_TIME', 'C')`" - )) + arrow_not_supported( + "On Windows, time locales other than 'C'", + body = c(">" = "Consider setting `Sys.setlocale('LC_TIME', 'C')`") + ) } locale } @@ -56,13 +56,15 @@ duration_from_chunks <- function(chunks) { matched_chunks <- accepted_chunks[pmatch(names(chunks), accepted_chunks, duplicates.ok = TRUE)] if (any(is.na(matched_chunks))) { - abort( - paste0( - "named `difftime` units other than: ", - oxford_paste(accepted_chunks, quote_symbol = "`"), - " not supported in Arrow. \nInvalid `difftime` parts: ", + arrow_not_supported( + paste( + "named `difftime` units other than:", + oxford_paste(accepted_chunks, quote_symbol = "`") + ), + body = c(i = paste( + "Invalid `difftime` parts:", oxford_paste(names(chunks[is.na(matched_chunks)]), quote_symbol = "`") - ) + )) ) } @@ -114,7 +116,6 @@ binding_as_date_character <- function(x, } binding_as_date_numeric <- function(x, origin = "1970-01-01") { - # Arrow does not support direct casting from double to date32(), but for # integer-like values we can go via int32() # TODO: revisit after ARROW-15798 @@ -442,7 +443,7 @@ parse_period_unit <- function(x) { unit <- as.integer(pmatch(str_unit_start, known_units)) - 1L if (any(is.na(unit))) { - abort( + validation_error( sprintf( "Invalid period name: '%s'", str_unit, @@ -484,13 +485,13 @@ parse_period_unit <- function(x) { # more special cases: lubridate imposes sensible maximum # values on the number of seconds, minutes and hours if (unit == 3L && multiple > 60) { - abort("Rounding with second > 60 is not supported") + validation_error("Rounding with second > 60 is not supported") } if (unit == 4L && multiple > 60) { - abort("Rounding with minute > 60 is not supported") + validation_error("Rounding with minute > 60 is not supported") } if (unit == 5L && multiple > 24) { - abort("Rounding with hour > 24 is not supported") + validation_error("Rounding with hour > 24 is not supported") } list(unit = unit, multiple = multiple) diff --git a/r/R/dplyr-eval.R b/r/R/dplyr-eval.R index 3aaa29696b8c8..1997d698c0b24 100644 --- a/r/R/dplyr-eval.R +++ b/r/R/dplyr-eval.R @@ -25,30 +25,64 @@ arrow_eval <- function(expr, mask) { add_user_functions_to_mask(expr, mask) # This yields an Expression as long as the `exprs` are implemented in Arrow. - # Otherwise, it returns a try-error + # Otherwise, it raises a classed error, either: + # * arrow_not_supported: the expression is not supported in Arrow; retry with + # regular dplyr may work + # * validation_error: the expression is known to be not valid, so don't + # recommend retrying with regular dplyr tryCatch(eval_tidy(expr, mask), error = function(e) { - # Look for the cases where bad input was given, i.e. this would fail - # in regular dplyr anyway, and let those raise those as errors; - # else, for things not supported in Arrow return a "try-error", - # which we'll handle differently + # Inspect why the expression failed, and add the expr as the `call` + # for better error messages msg <- conditionMessage(e) - if (getOption("arrow.debug", FALSE)) print(msg) - patterns <- .cache$i18ized_error_pattern - if (is.null(patterns)) { - patterns <- i18ize_error_messages() - # Memoize it - .cache$i18ized_error_pattern <- patterns - } - if (grepl(patterns, msg)) { + arrow_debug <- getOption("arrow.debug", FALSE) + if (arrow_debug) print(msg) + + # A few cases: + # 1. Evaluation raised one of our error classes. Add the expr as the call + # and re-raise it. + if (inherits(e, c("validation_error", "arrow_not_supported"))) { + e$call <- expr stop(e) } - out <- structure(msg, class = "try-error", condition = e) - if (grepl("not supported.*Arrow|NotImplemented", msg) || getOption("arrow.debug", FALSE)) { - # One of ours. Mark it so that consumers can handle it differently - class(out) <- c("arrow-try-error", class(out)) + # 2. Error is from assert_that: raise as validation_error + if (inherits(e, "assertError")) { + validation_error(msg, call = expr) + } + + # 3. Check to see if this is a standard R error message (not found etc.). + # Retry with dplyr won't help. + if (grepl(get_standard_error_messages(), msg)) { + # Raise the original error: it's actually helpful here + validation_error(msg, call = expr) + } + # 3b. Check to see if this is from match.arg. Retry with dplyr won't help. + if (is.language(e$call) && identical(as.character(e$call[[1]]), "match.arg")) { + # Raise the original error: it's actually helpful here + validation_error(msg, call = expr) + } + + # 4. Check for NotImplemented error raised from Arrow C++ code. + # Not sure where exactly we may raise this, but if we see it, it means + # that something isn't supported in Arrow. Retry in dplyr may help? + if (grepl("NotImplemented", msg)) { + arrow_not_supported(.actual_msg = msg, call = expr) + } + + + # 5. Otherwise, we're not sure why this errored: it's not an error we raised + # explicitly. We'll assume it's because the function it calls isn't + # supported in arrow, and retry with dplyr may help. + if (arrow_debug) { + arrow_not_supported(.actual_msg = msg, call = expr) + } else { + # Don't show the original error message unless in debug mode because + # it's probably not helpful: like, if you've passed an Expression to a + # regular R function that operates on strings, the way it errors would be + # more confusing than just saying that the expression is not supported + # in arrow. + arrow_not_supported("Expression", call = expr) } - invisible(out) }) } @@ -93,15 +127,12 @@ add_user_functions_to_mask <- function(expr, mask) { invisible() } -handle_arrow_not_supported <- function(err, lab) { - # Look for informative message from the Arrow function version (see above) - if (inherits(err, "arrow-try-error")) { - # Include it if found - paste0("In ", lab, ", ", as.character(err)) - } else { - # Otherwise be opaque (the original error is probably not useful) - paste("Expression", lab, "not supported in Arrow") +get_standard_error_messages <- function() { + if (is.null(.cache$i18ized_error_pattern)) { + # Memoize it + .cache$i18ized_error_pattern <- i18ize_error_messages() } + .cache$i18ized_error_pattern } i18ize_error_messages <- function() { @@ -114,35 +145,107 @@ i18ize_error_messages <- function() { paste(map(out, ~ sub("X_____X", ".*", .)), collapse = "|") } -# Helper to raise a common error -arrow_not_supported <- function(msg) { - # TODO: raise a classed error? - stop(paste(msg, "not supported in Arrow"), call. = FALSE) +#' Helpers to raise classed errors +#' +#' `arrow_not_supported()` and `validation_error()` raise classed errors that +#' allow us to distinguish between things that are not supported in Arrow and +#' things that are just invalid input. Additional wrapping in `arrow_eval()` +#' and `try_arrow_dplyr()` provide more context and suggestions. +#' Importantly, if `arrow_not_supported` is raised, then retrying the same code +#' in regular dplyr in R may work. But if `validation_error` is raised, then we +#' shouldn't recommend retrying with regular dplyr because it will fail there +#' too. +#' +#' Use these in function bindings and in the dplyr methods. Inside of function +#' bindings, you don't need to provide the `call` argument, as it will be +#' automatically filled in with the expression that caused the error in +#' `arrow_eval()`. In dplyr methods, you should provide the `call` argument; +#' `rlang::caller_call()` often is correct, but you may need to experiment to +#' find how far up the call stack you need to look. +#' +#' You may provide additional information in the `body` argument, a named +#' character vector. Use `i` for additional information about the error and `>` +#' to indicate potential solutions or workarounds that don't require pulling the +#' data into R. If you have an `arrow_not_supported()` error with a `>` +#' suggestion, when the error is ultimately raised by `try_error_dplyr()`, +#' `Call collect() first to pull data into R` won't be the only suggestion. +#' +#' You can still use `match.arg()` and `assert_that()` for simple input +#' validation inside of the function bindings. `arrow_eval()` will catch their +#' errors and re-raise them as `validation_error`. +#' +#' @param msg The message to show. `arrow_not_supported()` will append +#' "not supported in Arrow" to this message. +#' @param .actual_msg If you don't want to append "not supported in Arrow" to +#' the message, you can provide the full message here. +#' @param ... Additional arguments to pass to `rlang::abort()`. Useful arguments +#' include `call` to provide the call or expression that caused the error, and +#' `body` to provide additional context about the error. +#' @keywords internal +arrow_not_supported <- function(msg, + .actual_msg = paste(msg, "not supported in Arrow"), + ...) { + abort(.actual_msg, class = "arrow_not_supported", use_cli_format = TRUE, ...) } -# Create a data mask for evaluating a dplyr expression -arrow_mask <- function(.data, aggregation = FALSE) { - f_env <- new_environment(.cache$functions) +#' @rdname arrow_not_supported +validation_error <- function(msg, ...) { + abort(msg, class = "validation_error", use_cli_format = TRUE, ...) +} - if (aggregation) { - # Add the aggregation functions to the environment, and set the enclosing - # environment to the parent frame so that, when called from summarize_eval(), - # they can reference and assign into `..aggregations` defined there. - pf <- parent.frame() - for (f in names(agg_funcs)) { - f_env[[f]] <- agg_funcs[[f]] - environment(f_env[[f]]) <- pf - } - } else { - # Add functions that need to error hard and clear. - # Some R functions will still try to evaluate on an Expression - # and return NA with a warning :exploding_head: - fail <- function(...) stop("Not implemented") - for (f in c("mean", "sd")) { - f_env[[f]] <- fail - } +# Wrap the contents of an arrow dplyr verb function in a tryCatch block to +# handle arrow_not_supported errors: +# * If it errors because of arrow_not_supported, abandon ship +# * If it's another error, just stop, retry with regular dplyr won't help +try_arrow_dplyr <- function(expr) { + parent <- caller_env() + # Make sure that the call is available in the parent environment + # so that we can use it in abandon_ship, if needed + evalq(call <- match.call(), parent) + + tryCatch( + eval(expr, parent), + arrow_not_supported = function(e) abandon_ship(e, parent) + ) +} + +# Helper to handle unsupported dplyr features +# * For Table/RecordBatch, we collect() and then call the dplyr method in R +# * For Dataset, we error and recommend collect() +# Requires that `env` contains `.data` +# The Table/RB path also requires `call` to be in `env` (try_arrow_dplyr adds it) +# and that the function being called also exists in the dplyr namespace. +abandon_ship <- function(err, env) { + .data <- get(".data", envir = env) + if (query_on_dataset(.data)) { + # Add a note suggesting `collect()` to the error message. + # If there are other suggestions already there (with the > arrow name), + # collect() isn't the only suggestion, so message differently + msg <- ifelse( + ">" %in% names(err$body), + "Or, call collect() first to pull data into R.", + "Call collect() first to pull data into R." + ) + err$body <- c(err$body, ">" = msg) + stop(err) } + # Else, warn, collect(), and run in regular dplyr + call <- get("call", envir = env) + rlang::warn( + message = paste0("In ", format_expr(err$call), ": "), + body = c("i" = conditionMessage(err), ">" = "Pulling data into R") + ) + call$.data <- dplyr::collect(.data) + dplyr_fun_name <- sub("^(.*?)\\..*", "\\1", as.character(call[[1]])) + call[[1]] <- get(dplyr_fun_name, envir = asNamespace("dplyr")) + eval(call, env) +} + +# Create a data mask for evaluating a dplyr expression +arrow_mask <- function(.data) { + f_env <- new_environment(.cache$functions) + # Assign the schema to the expressions schema <- .data$.data$schema walk(.data$selected_columns, ~ (.$schema <- schema)) @@ -156,6 +259,8 @@ arrow_mask <- function(.data, aggregation = FALSE) { # TODO: figure out what rlang::as_data_pronoun does/why we should use it # (because if we do we get `Error: Can't modify the data pronoun` in mutate()) out$.data <- .data$selected_columns + # Add the aggregations list to collect any that get pulled out when evaluating + out$.aggregations <- empty_named_list() out } diff --git a/r/R/dplyr-filter.R b/r/R/dplyr-filter.R index d85fa16af2e71..36219e411e56d 100644 --- a/r/R/dplyr-filter.R +++ b/r/R/dplyr-filter.R @@ -19,69 +19,45 @@ # The following S3 methods are registered on load if dplyr is present filter.arrow_dplyr_query <- function(.data, ..., .by = NULL, .preserve = FALSE) { - # TODO something with the .preserve argument - out <- as_adq(.data) + try_arrow_dplyr({ + # TODO something with the .preserve argument + out <- as_adq(.data) - by <- compute_by({{ .by }}, out, by_arg = ".by", data_arg = ".data") + by <- compute_by({{ .by }}, out, by_arg = ".by", data_arg = ".data") - if (by$from_by) { - out$group_by_vars <- by$names - } + if (by$from_by) { + out$group_by_vars <- by$names + } - expanded_filters <- expand_across(out, quos(...)) - if (length(expanded_filters) == 0) { - # Nothing to do - return(as_adq(.data)) - } + expanded_filters <- expand_across(out, quos(...)) + if (length(expanded_filters) == 0) { + # Nothing to do + return(as_adq(.data)) + } - # tidy-eval the filter expressions inside an Arrow data_mask - filters <- lapply(expanded_filters, arrow_eval, arrow_mask(out)) - bad_filters <- map_lgl(filters, ~ inherits(., "try-error")) - if (any(bad_filters)) { - # This is similar to abandon_ship() except that the filter eval is - # vectorized, and we apply filters that _did_ work before abandoning ship - # with the rest - expr_labs <- map_chr(expanded_filters[bad_filters], format_expr) - if (query_on_dataset(out)) { - # Abort. We don't want to auto-collect if this is a Dataset because that - # could blow up, too big. - stop( - "Filter expression not supported for Arrow Datasets: ", - oxford_paste(expr_labs, quote = FALSE), - "\nCall collect() first to pull data into R.", - call. = FALSE - ) - } else { - arrow_errors <- map2_chr( - filters[bad_filters], expr_labs, - handle_arrow_not_supported - ) - if (length(arrow_errors) == 1) { - msg <- paste0(arrow_errors, "; ") - } else { - msg <- paste0("* ", arrow_errors, "\n", collapse = "") + # tidy-eval the filter expressions inside an Arrow data_mask + mask <- arrow_mask(out) + for (expr in expanded_filters) { + filt <- arrow_eval(expr, mask) + if (length(mask$.aggregations)) { + # dplyr lets you filter on e.g. x < mean(x), but we haven't implemented it. + # But we could, the same way it works in mutate() via join, if someone asks. + # Until then, just error. + # TODO: add a test for this + arrow_not_supported( + .actual_msg = "Expression not supported in filter() in Arrow", + call = expr + ) } - warning( - msg, "pulling data into R", - immediate. = TRUE, - call. = FALSE - ) - # Set any valid filters first, then collect and then apply the invalid ones in R - out <- dplyr::collect(set_filters(out, filters[!bad_filters])) - if (by$from_by) { - out <- dplyr::ungroup(out) - } - return(dplyr::filter(out, !!!expanded_filters[bad_filters], .by = {{ .by }})) + out <- set_filters(out, filt) } - } - - out <- set_filters(out, filters) - if (by$from_by) { - out$group_by_vars <- character() - } + if (by$from_by) { + out$group_by_vars <- character() + } - out + out + }) } filter.Dataset <- filter.ArrowTabular <- filter.RecordBatchReader <- filter.arrow_dplyr_query diff --git a/r/R/dplyr-funcs-agg.R b/r/R/dplyr-funcs-agg.R index ab1df1d2f15a5..340ebe7adc90f 100644 --- a/r/R/dplyr-funcs-agg.R +++ b/r/R/dplyr-funcs-agg.R @@ -17,7 +17,7 @@ # Aggregation functions # -# These all insert into an ..aggregations list (in a parent frame) a list containing: +# These all insert into an .aggregations list in the mask, a list containing: # @param fun string function name # @param data list of 0 or more Expressions # @param options list of function options, as passed to call_function @@ -29,56 +29,56 @@ # you can use list_compute_functions("^hash_") register_bindings_aggregate <- function() { - register_binding_agg("base::sum", function(..., na.rm = FALSE) { + register_binding("base::sum", function(..., na.rm = FALSE) { set_agg( fun = "sum", data = ensure_one_arg(list2(...), "sum"), options = list(skip_nulls = na.rm, min_count = 0L) ) }) - register_binding_agg("base::prod", function(..., na.rm = FALSE) { + register_binding("base::prod", function(..., na.rm = FALSE) { set_agg( fun = "product", data = ensure_one_arg(list2(...), "prod"), options = list(skip_nulls = na.rm, min_count = 0L) ) }) - register_binding_agg("base::any", function(..., na.rm = FALSE) { + register_binding("base::any", function(..., na.rm = FALSE) { set_agg( fun = "any", data = ensure_one_arg(list2(...), "any"), options = list(skip_nulls = na.rm, min_count = 0L) ) }) - register_binding_agg("base::all", function(..., na.rm = FALSE) { + register_binding("base::all", function(..., na.rm = FALSE) { set_agg( fun = "all", data = ensure_one_arg(list2(...), "all"), options = list(skip_nulls = na.rm, min_count = 0L) ) }) - register_binding_agg("base::mean", function(x, na.rm = FALSE) { + register_binding("base::mean", function(x, na.rm = FALSE) { set_agg( fun = "mean", data = list(x), options = list(skip_nulls = na.rm, min_count = 0L) ) }) - register_binding_agg("stats::sd", function(x, na.rm = FALSE, ddof = 1) { + register_binding("stats::sd", function(x, na.rm = FALSE, ddof = 1) { set_agg( fun = "stddev", data = list(x), options = list(skip_nulls = na.rm, min_count = 0L, ddof = ddof) ) }) - register_binding_agg("stats::var", function(x, na.rm = FALSE, ddof = 1) { + register_binding("stats::var", function(x, na.rm = FALSE, ddof = 1) { set_agg( fun = "variance", data = list(x), options = list(skip_nulls = na.rm, min_count = 0L, ddof = ddof) ) }) - register_binding_agg( + register_binding( "stats::quantile", function(x, probs, na.rm = FALSE) { if (length(probs) != 1) { @@ -103,7 +103,7 @@ register_bindings_aggregate <- function() { "approximate quantile (t-digest) is computed" ) ) - register_binding_agg( + register_binding( "stats::median", function(x, na.rm = FALSE) { # TODO: Bind to the Arrow function that returns an exact median and remove @@ -122,28 +122,28 @@ register_bindings_aggregate <- function() { }, notes = "approximate median (t-digest) is computed" ) - register_binding_agg("dplyr::n_distinct", function(..., na.rm = FALSE) { + register_binding("dplyr::n_distinct", function(..., na.rm = FALSE) { set_agg( fun = "count_distinct", data = ensure_one_arg(list2(...), "n_distinct"), options = list(na.rm = na.rm) ) }) - register_binding_agg("dplyr::n", function() { + register_binding("dplyr::n", function() { set_agg( fun = "count_all", data = list(), options = list() ) }) - register_binding_agg("base::min", function(..., na.rm = FALSE) { + register_binding("base::min", function(..., na.rm = FALSE) { set_agg( fun = "min", data = ensure_one_arg(list2(...), "min"), options = list(skip_nulls = na.rm, min_count = 0L) ) }) - register_binding_agg("base::max", function(..., na.rm = FALSE) { + register_binding("base::max", function(..., na.rm = FALSE) { set_agg( fun = "max", data = ensure_one_arg(list2(...), "max"), @@ -154,38 +154,38 @@ register_bindings_aggregate <- function() { set_agg <- function(...) { agg_data <- list2(...) - # Find the environment where ..aggregations is stored - target <- find_aggregations_env() - aggs <- get("..aggregations", target) + # Find the environment where .aggregations is stored + target <- find_arrow_mask() + aggs <- get(".aggregations", target) lapply(agg_data[["data"]], function(expr) { - # If any of the fields referenced in the expression are in ..aggregations, + # If any of the fields referenced in the expression are in .aggregations, # then we can't aggregate over them. # This is mainly for combinations of dataset columns and aggregations, # like sum(x - mean(x)), i.e. window functions. # This will reject (sum(sum(x)) as well, but that's not a useful operation. if (any(expr$field_names_in_expression() %in% names(aggs))) { - # TODO: support in ARROW-13926 arrow_not_supported("aggregate within aggregate expression") } }) - # Record the (fun, data, options) in ..aggregations + # Record the (fun, data, options) in .aggregations # and return a FieldRef pointing to it tmpname <- paste0("..temp", length(aggs)) aggs[[tmpname]] <- agg_data - assign("..aggregations", aggs, envir = target) + assign(".aggregations", aggs, envir = target) Expression$field_ref(tmpname) } -find_aggregations_env <- function() { - # Find the environment where ..aggregations is stored, +find_arrow_mask <- function() { + # Find the arrow_mask environment by looking for .aggregations, # it's in parent.env of something in the call stack - for (f in sys.frames()) { - if (exists("..aggregations", envir = f)) { - return(f) + n <- 1 + while (TRUE) { + if (exists(".aggregations", envir = caller_env(n))) { + return(caller_env(n)) } + n <- n + 1 } - stop("Could not find ..aggregations") } ensure_one_arg <- function(args, fun) { diff --git a/r/R/dplyr-funcs-conditional.R b/r/R/dplyr-funcs-conditional.R index b9639f00295ce..3ab955aa8aee4 100644 --- a/r/R/dplyr-funcs-conditional.R +++ b/r/R/dplyr-funcs-conditional.R @@ -37,7 +37,7 @@ register_bindings_conditional <- function() { register_binding("dplyr::coalesce", function(...) { args <- list2(...) if (length(args) < 1) { - abort("At least one argument must be supplied to coalesce()") + validation_error("At least one argument must be supplied to coalesce()") } # Treat NaN like NA for consistency with dplyr::coalesce(), but if *all* @@ -102,7 +102,7 @@ register_bindings_conditional <- function() { formulas <- list2(...) n <- length(formulas) if (n == 0) { - abort("No cases provided in case_when()") + validation_error("No cases provided") } query <- vector("list", n) value <- vector("list", n) @@ -110,20 +110,17 @@ register_bindings_conditional <- function() { for (i in seq_len(n)) { f <- formulas[[i]] if (!inherits(f, "formula")) { - abort("Each argument to case_when() must be a two-sided formula") + validation_error("Each argument to case_when() must be a two-sided formula") } query[[i]] <- arrow_eval(f[[2]], mask) value[[i]] <- arrow_eval(f[[3]], mask) if (!call_binding("is.logical", query[[i]])) { - abort("Left side of each formula in case_when() must be a logical expression") - } - if (inherits(value[[i]], "try-error")) { - abort(handle_arrow_not_supported(value[[i]], format_expr(f[[3]]))) + validation_error("Left side of each formula in case_when() must be a logical expression") } } if (!is.null(.default)) { if (length(.default) != 1) { - abort(paste0("`.default` must have size 1, not size ", length(.default), ".")) + validation_error(paste0("`.default` must have size 1, not size ", length(.default), ".")) } query[n + 1] <- TRUE @@ -140,6 +137,5 @@ register_bindings_conditional <- function() { value ) ) - }, notes = "`.ptype` and `.size` arguments not supported" - ) + }, notes = "`.ptype` and `.size` arguments not supported") } diff --git a/r/R/dplyr-funcs-datetime.R b/r/R/dplyr-funcs-datetime.R index 440210afd630c..5e6ac4a1035f8 100644 --- a/r/R/dplyr-funcs-datetime.R +++ b/r/R/dplyr-funcs-datetime.R @@ -121,7 +121,7 @@ register_bindings_datetime_utility <- function() { precision <- "ymdhms" } if (!precision %in% names(ISO8601_precision_map)) { - abort( + validation_error( paste( "`precision` must be one of the following values:", paste(names(ISO8601_precision_map), collapse = ", "), @@ -325,10 +325,10 @@ register_bindings_datetime_conversion <- function() { origin = "1970-01-01", tz = "UTC") { if (is.null(format) && length(tryFormats) > 1) { - abort( - paste( - "`as.Date()` with multiple `tryFormats` is not supported in Arrow.", - "Consider using the lubridate specialised parsing functions `ymd()`, `ymd()`, etc." + arrow_not_supported( + "`as.Date()` with multiple `tryFormats`", + body = c( + ">" = "Consider using the lubridate specialised parsing functions `ymd()`, `ymd()`, etc." ) ) } @@ -455,15 +455,13 @@ register_bindings_datetime_timezone <- function() { arrow_not_supported("`roll_dst` must be 1 or 2 items long; other lengths") } - nonexistent <- switch( - roll_dst[1], + nonexistent <- switch(roll_dst[1], "error" = 0L, "boundary" = 2L, arrow_not_supported("`roll_dst` value must be 'error' or 'boundary' for nonexistent times; other values") ) - ambiguous <- switch( - roll_dst[2], + ambiguous <- switch(roll_dst[2], "error" = 0L, "pre" = 1L, "post" = 2L, @@ -651,7 +649,7 @@ register_bindings_duration_helpers <- function() { register_binding( "lubridate::dpicoseconds", function(x = 1) { - abort("Duration in picoseconds not supported in Arrow.") + arrow_not_supported("Duration in picoseconds") }, notes = "not supported" ) diff --git a/r/R/dplyr-funcs-doc.R b/r/R/dplyr-funcs-doc.R index 2042f800142b7..7f0627c33d010 100644 --- a/r/R/dplyr-funcs-doc.R +++ b/r/R/dplyr-funcs-doc.R @@ -36,7 +36,7 @@ #' which returns an `arrow` [Table], or `collect()`, which pulls the resulting #' Table into an R `tibble`. #' -#' * [`anti_join()`][dplyr::anti_join()]: the `copy` and `na_matches` arguments are ignored +#' * [`anti_join()`][dplyr::anti_join()]: the `copy` argument is ignored #' * [`arrange()`][dplyr::arrange()] #' * [`collapse()`][dplyr::collapse()] #' * [`collect()`][dplyr::collect()] @@ -45,22 +45,22 @@ #' * [`distinct()`][dplyr::distinct()]: `.keep_all = TRUE` not supported #' * [`explain()`][dplyr::explain()] #' * [`filter()`][dplyr::filter()] -#' * [`full_join()`][dplyr::full_join()]: the `copy` and `na_matches` arguments are ignored +#' * [`full_join()`][dplyr::full_join()]: the `copy` argument is ignored #' * [`glimpse()`][dplyr::glimpse()] #' * [`group_by()`][dplyr::group_by()] #' * [`group_by_drop_default()`][dplyr::group_by_drop_default()] #' * [`group_vars()`][dplyr::group_vars()] #' * [`groups()`][dplyr::groups()] -#' * [`inner_join()`][dplyr::inner_join()]: the `copy` and `na_matches` arguments are ignored -#' * [`left_join()`][dplyr::left_join()]: the `copy` and `na_matches` arguments are ignored -#' * [`mutate()`][dplyr::mutate()]: window functions (e.g. things that require aggregation within groups) not currently supported +#' * [`inner_join()`][dplyr::inner_join()]: the `copy` argument is ignored +#' * [`left_join()`][dplyr::left_join()]: the `copy` argument is ignored +#' * [`mutate()`][dplyr::mutate()] #' * [`pull()`][dplyr::pull()]: the `name` argument is not supported; returns an R vector by default but this behavior is deprecated and will return an Arrow [ChunkedArray] in a future release. Provide `as_vector = TRUE/FALSE` to control this behavior, or set `options(arrow.pull_as_vector)` globally. #' * [`relocate()`][dplyr::relocate()] #' * [`rename()`][dplyr::rename()] #' * [`rename_with()`][dplyr::rename_with()] -#' * [`right_join()`][dplyr::right_join()]: the `copy` and `na_matches` arguments are ignored +#' * [`right_join()`][dplyr::right_join()]: the `copy` argument is ignored #' * [`select()`][dplyr::select()] -#' * [`semi_join()`][dplyr::semi_join()]: the `copy` and `na_matches` arguments are ignored +#' * [`semi_join()`][dplyr::semi_join()]: the `copy` argument is ignored #' * [`show_query()`][dplyr::show_query()] #' * [`slice_head()`][dplyr::slice_head()]: slicing within groups not supported; Arrow datasets do not have row order, so head is non-deterministic; `prop` only supported on queries where `nrow()` is knowable without evaluating #' * [`slice_max()`][dplyr::slice_max()]: slicing within groups not supported; `with_ties = TRUE` (dplyr default) is not supported; `prop` only supported on queries where `nrow()` is knowable without evaluating diff --git a/r/R/dplyr-funcs-simple.R b/r/R/dplyr-funcs-simple.R index 308a46601a6db..4ccc2498435b3 100644 --- a/r/R/dplyr-funcs-simple.R +++ b/r/R/dplyr-funcs-simple.R @@ -177,7 +177,7 @@ common_type <- function(exprs) { # * pmin/pmax return(first_type) } - stop("There is no common type in these expressions") + validation_error("There is no common type in these expressions") } cast_or_parse <- function(x, type) { diff --git a/r/R/dplyr-funcs-string.R b/r/R/dplyr-funcs-string.R index a21ce78edd189..77e1a5405a692 100644 --- a/r/R/dplyr-funcs-string.R +++ b/r/R/dplyr-funcs-string.R @@ -134,9 +134,9 @@ format_string_replacement <- function(replacement, ignore.case, fixed) { # Arrow locale will be supported with ARROW-14126 stop_if_locale_provided <- function(locale) { if (!identical(locale, "en")) { - stop("Providing a value for 'locale' other than the default ('en') is not supported in Arrow. ", - "To change locale, use 'Sys.setlocale()'", - call. = FALSE + arrow_not_supported( + "Providing a value for 'locale' other than the default ('en')", + body = c(">" = "To change locale, use 'Sys.setlocale()'") ) } } @@ -158,10 +158,11 @@ register_bindings_string_join <- function() { # handle scalar literal args, and cast all args to string for # consistency with base::paste(), base::paste0(), and stringr::str_c() if (!inherits(arg, "Expression")) { - assert_that( - length(arg) == 1, - msg = "Literal vectors of length != 1 not supported in string concatenation" - ) + if (length(arg) != 1) { + arrow_not_supported( + "Literal vectors of length != 1 in string concatenation" + ) + } Expression$scalar(as.character(arg)) } else { call_binding("as.character", arg) @@ -181,12 +182,11 @@ register_bindings_string_join <- function() { register_binding( "base::paste", function(..., sep = " ", collapse = NULL, recycle0 = FALSE) { - assert_that( - is.null(collapse), - msg = "paste() with the collapse argument is not yet supported in Arrow" - ) - if (!inherits(sep, "Expression")) { - assert_that(!is.na(sep), msg = "Invalid separator") + if (!is.null(collapse)) { + arrow_not_supported("`collapse` argument") + } + if (!inherits(sep, "Expression") && is.na(sep)) { + validation_error("Invalid separator") } arrow_string_join_function(NullHandlingBehavior$REPLACE, "NA")(..., sep) }, @@ -196,10 +196,9 @@ register_bindings_string_join <- function() { register_binding( "base::paste0", function(..., collapse = NULL, recycle0 = FALSE) { - assert_that( - is.null(collapse), - msg = "paste0() with the collapse argument is not yet supported in Arrow" - ) + if (!is.null(collapse)) { + arrow_not_supported("`collapse` argument") + } arrow_string_join_function(NullHandlingBehavior$REPLACE, "NA")(..., "") }, notes = "the `collapse` argument is not yet supported" @@ -208,12 +207,11 @@ register_bindings_string_join <- function() { register_binding( "stringr::str_c", function(..., sep = "", collapse = NULL) { - assert_that( - is.null(collapse), - msg = "str_c() with the collapse argument is not yet supported in Arrow" - ) - if (!inherits(sep, "Expression")) { - assert_that(!is.na(sep), msg = "`sep` must be a single string, not `NA`.") + if (!is.null(collapse)) { + arrow_not_supported("`collapse` argument") + } + if (!inherits(sep, "Expression") && is.na(sep)) { + validation_error("`sep` must be a single string, not `NA`.") } arrow_string_join_function(NullHandlingBehavior$EMIT_NULL)(..., sep) }, @@ -352,10 +350,10 @@ register_bindings_string_regex <- function() { arrow_r_string_replace_function <- function(max_replacements) { function(pattern, replacement, x, ignore.case = FALSE, fixed = FALSE) { if (length(pattern) != 1) { - stop("`pattern` must be a length 1 character vector") + validation_error("`pattern` must be a length 1 character vector") } if (length(replacement) != 1) { - stop("`replacement` must be a length 1 character vector") + validation_error("`replacement` must be a length 1 character vector") } Expression$create( ifelse(fixed && !ignore.case, "replace_substring", "replace_substring_regex"), @@ -512,14 +510,12 @@ register_bindings_string_other <- function() { register_binding( "base::substr", function(x, start, stop) { - assert_that( - length(start) == 1, - msg = "`start` must be length 1 - other lengths are not supported in Arrow" - ) - assert_that( - length(stop) == 1, - msg = "`stop` must be length 1 - other lengths are not supported in Arrow" - ) + if (length(start) != 1) { + arrow_not_supported("`start` must be length 1 - other lengths") + } + if (length(stop) != 1) { + arrow_not_supported("`stop` must be length 1 - other lengths") + } # substr treats values as if they're on a continuous number line, so values # 0 are effectively blank characters - set `start` to 1 here so Arrow mimics @@ -561,14 +557,12 @@ register_bindings_string_other <- function() { }) register_binding("stringr::str_sub", function(string, start = 1L, end = -1L) { - assert_that( - length(start) == 1, - msg = "`start` must be length 1 - other lengths are not supported in Arrow" - ) - assert_that( - length(end) == 1, - msg = "`end` must be length 1 - other lengths are not supported in Arrow" - ) + if (length(start) != 1) { + arrow_not_supported("`start` must be length 1 - other lengths") + } + if (length(end) != 1) { + arrow_not_supported("`end` must be length 1 - other lengths") + } # In stringr::str_sub, an `end` value of -1 means the end of the string, so # set it to the maximum integer to match this behavior diff --git a/r/R/dplyr-funcs-type.R b/r/R/dplyr-funcs-type.R index f244682737cb4..85c26ec05c8ba 100644 --- a/r/R/dplyr-funcs-type.R +++ b/r/R/dplyr-funcs-type.R @@ -105,7 +105,7 @@ register_bindings_type_cast <- function() { } else if (inherits(class2, "DataType")) { object$type() == as_type(class2) } else { - stop("Second argument to is() is not a string or DataType", call. = FALSE) + validation_error("Second argument to is() is not a string or DataType") } }) @@ -140,7 +140,7 @@ register_bindings_type_cast <- function() { fix.empty.names = TRUE, stringsAsFactors = FALSE) { # we need a specific value of stringsAsFactors because the default was - # TRUE in R <= 3.6 + # TRUE in R <= 3.6 and folks might still be cargoculting to stay in the past. if (!identical(stringsAsFactors, FALSE)) { arrow_not_supported("stringsAsFactors = TRUE") } @@ -219,7 +219,10 @@ register_bindings_type_inspect <- function() { call_binding("is.character", x) }) register_binding("rlang::is_double", function(x, n = NULL, finite = NULL) { - assert_that(is.null(n) && is.null(finite)) + assert_that(is.null(n)) + if (!is.null(finite)) { + arrow_not_supported("`finite` argument") + } call_binding("is.double", x) }) register_binding("rlang::is_integer", function(x, n = NULL) { diff --git a/r/R/dplyr-funcs.R b/r/R/dplyr-funcs.R index abf2362d0107f..c0eb47e428b7f 100644 --- a/r/R/dplyr-funcs.R +++ b/r/R/dplyr-funcs.R @@ -22,8 +22,8 @@ NULL #' Register compute bindings #' -#' The `register_binding()` and `register_binding_agg()` functions -#' are used to populate a list of functions that operate on (and return) +#' `register_binding()` is used to populate a list of functions that operate on +#' (and return) #' Expressions. These are the basis for the `.data` mask inside dplyr methods. #' #' @section Writing bindings: @@ -40,26 +40,10 @@ NULL #' * Inside your function, you can call any other binding with `call_binding()`. #' #' @param fun_name A string containing a function name in the form `"function"` or -#' `"package::function"`. The package name is currently not used but -#' may be used in the future to allow these types of function calls. -#' @param fun A function or `NULL` to un-register a previous function. +#' `"package::function"`. +#' @param fun A function, or `NULL` to un-register a previous function. #' This function must accept `Expression` objects as arguments and return #' `Expression` objects instead of regular R objects. -#' @param agg_fun An aggregate function or `NULL` to un-register a previous -#' aggregate function. This function must accept `Expression` objects as -#' arguments and return a `list()` with components: -#' - `fun`: string function name -#' - `data`: list of 0 or more `Expression`s -#' - `options`: list of function options, as passed to call_function -#' @param update_cache Update .cache$functions at the time of registration. -#' the default is FALSE because the majority of usage is to register -#' bindings at package load, after which we create the cache once. The -#' reason why .cache$functions is needed in addition to nse_funcs for -#' non-aggregate functions could be revisited...it is currently used -#' as the data mask in mutate, filter, and aggregate (but not -#' summarise) because the data mask has to be a list. -#' @param registry An environment in which the functions should be -#' assigned. #' @param notes string for the docs: note any limitations or differences in #' behavior between the Arrow version and the R function. #' @return The previously registered binding or `NULL` if no previously @@ -67,12 +51,10 @@ NULL #' @keywords internal register_binding <- function(fun_name, fun, - registry = nse_funcs, - update_cache = FALSE, notes = character(0)) { unqualified_name <- sub("^.*?:{+}", "", fun_name) - previous_fun <- registry[[unqualified_name]] + previous_fun <- .cache$functions[[unqualified_name]] # if the unqualified name exists in the registry, warn if (!is.null(previous_fun) && !identical(fun, previous_fun)) { @@ -87,58 +69,25 @@ register_binding <- function(fun_name, # register both as `pkg::fun` and as `fun` if `qualified_name` is prefixed # unqualified_name and fun_name will be the same if not prefixed - registry[[unqualified_name]] <- fun - registry[[fun_name]] <- fun - + .cache$functions[[unqualified_name]] <- fun + .cache$functions[[fun_name]] <- fun .cache$docs[[fun_name]] <- notes - - if (update_cache) { - fun_cache <- .cache$functions - fun_cache[[unqualified_name]] <- fun - fun_cache[[fun_name]] <- fun - .cache$functions <- fun_cache - } - invisible(previous_fun) } -unregister_binding <- function(fun_name, registry = nse_funcs, - update_cache = FALSE) { +unregister_binding <- function(fun_name) { unqualified_name <- sub("^.*?:{+}", "", fun_name) - previous_fun <- registry[[unqualified_name]] + previous_fun <- .cache$functions[[unqualified_name]] - rm( - list = unique(c(fun_name, unqualified_name)), - envir = registry, - inherits = FALSE - ) - - if (update_cache) { - fun_cache <- .cache$functions - fun_cache[[unqualified_name]] <- NULL - fun_cache[[fun_name]] <- NULL - .cache$functions <- fun_cache - } + .cache$functions[[unqualified_name]] <- NULL + .cache$functions[[fun_name]] <- NULL invisible(previous_fun) } -#' @rdname register_binding -#' @keywords internal -register_binding_agg <- function(fun_name, - agg_fun, - registry = agg_funcs, - notes = character(0)) { - register_binding(fun_name, agg_fun, registry = registry, notes = notes) -} - # Supports functions and tests that call previously-defined bindings call_binding <- function(fun_name, ...) { - nse_funcs[[fun_name]](...) -} - -call_binding_agg <- function(fun_name, ...) { - agg_funcs[[fun_name]](...) + .cache$functions[[fun_name]](...) } create_binding_cache <- function() { @@ -147,7 +96,7 @@ create_binding_cache <- function() { # Register all available Arrow Compute functions, namespaced as arrow_fun. all_arrow_funs <- list_compute_functions() - arrow_funcs <- set_names( + .cache$functions <- set_names( lapply(all_arrow_funs, function(fun) { force(fun) function(...) Expression$create(fun, ...) @@ -155,7 +104,7 @@ create_binding_cache <- function() { paste0("arrow_", all_arrow_funs) ) - # Register bindings into nse_funcs and agg_funcs + # Register bindings into the cache register_bindings_array_function_map() register_bindings_aggregate() register_bindings_conditional() @@ -165,37 +114,17 @@ create_binding_cache <- function() { register_bindings_type() register_bindings_augmented() - # We only create the cache for nse_funcs and not agg_funcs - .cache$functions <- c(as.list(nse_funcs), arrow_funcs) -} - -# environments in the arrow namespace used in the above functions -nse_funcs <- new.env(parent = emptyenv()) -agg_funcs <- new.env(parent = emptyenv()) -.cache <- new.env(parent = emptyenv()) - -# we register 2 versions of the "::" binding - one for use with nse_funcs -# and another one for use with agg_funcs (registered in dplyr-funcs-agg.R) -nse_funcs[["::"]] <- function(lhs, rhs) { - lhs_name <- as.character(substitute(lhs)) - rhs_name <- as.character(substitute(rhs)) + .cache$functions[["::"]] <- function(lhs, rhs) { + lhs_name <- as.character(substitute(lhs)) + rhs_name <- as.character(substitute(rhs)) - fun_name <- paste0(lhs_name, "::", rhs_name) + fun_name <- paste0(lhs_name, "::", rhs_name) - # if we do not have a binding for pkg::fun, then fall back on to the - # regular pkg::fun function - nse_funcs[[fun_name]] %||% asNamespace(lhs_name)[[rhs_name]] + # if we do not have a binding for pkg::fun, then fall back on to the + # regular pkg::fun function + .cache$functions[[fun_name]] %||% asNamespace(lhs_name)[[rhs_name]] + } } -agg_funcs[["::"]] <- function(lhs, rhs) { - lhs_name <- as.character(substitute(lhs)) - rhs_name <- as.character(substitute(rhs)) - - fun_name <- paste0(lhs_name, "::", rhs_name) - - # if we do not have a binding for pkg::fun, then fall back on to the - # nse_funcs (useful when we have a regular function inside an aggregating one) - # and then, if searching nse_funcs fails too, fall back to the - # regular `pkg::fun()` function - agg_funcs[[fun_name]] %||% nse_funcs[[fun_name]] %||% asNamespace(lhs_name)[[rhs_name]] -} +# environment in the arrow namespace used in the above functions +.cache <- new.env(parent = emptyenv()) diff --git a/r/R/dplyr-join.R b/r/R/dplyr-join.R index 39237f574bd28..e76e041a54277 100644 --- a/r/R/dplyr-join.R +++ b/r/R/dplyr-join.R @@ -25,14 +25,15 @@ do_join <- function(x, suffix = c(".x", ".y"), ..., keep = FALSE, - na_matches, + na_matches = c("na", "never"), join_type) { # TODO: handle `copy` arg: ignore? - # TODO: handle `na_matches` arg x <- as_adq(x) y <- as_adq(y) by <- handle_join_by(by, x, y) + na_matches <- match.arg(na_matches) + # For outer joins, we need to output the join keys on both sides so we # can coalesce them afterwards. left_output <- if (!keep && join_type == "RIGHT_OUTER") { @@ -54,7 +55,8 @@ do_join <- function(x, left_output = left_output, right_output = right_output, suffix = suffix, - keep = keep + keep = keep, + na_matches = na_matches == "na" ) collapse.arrow_dplyr_query(x) } diff --git a/r/R/dplyr-mutate.R b/r/R/dplyr-mutate.R index 287532dee08a9..fcb1cedbbb168 100644 --- a/r/R/dplyr-mutate.R +++ b/r/R/dplyr-mutate.R @@ -24,105 +24,116 @@ mutate.arrow_dplyr_query <- function(.data, .keep = c("all", "used", "unused", "none"), .before = NULL, .after = NULL) { - call <- match.call() - out <- as_adq(.data) + try_arrow_dplyr({ + out <- as_adq(.data) - by <- compute_by({{ .by }}, out, by_arg = ".by", data_arg = ".data") + by <- compute_by({{ .by }}, out, by_arg = ".by", data_arg = ".data") - if (by$from_by) { - out$group_by_vars <- by$names - } - grv <- out$group_by_vars - expression_list <- expand_across(out, quos(...), exclude_cols = grv) - exprs <- ensure_named_exprs(expression_list) + if (by$from_by) { + out$group_by_vars <- by$names + } + grv <- out$group_by_vars + expression_list <- expand_across(out, quos(...), exclude_cols = grv) + exprs <- ensure_named_exprs(expression_list) - .keep <- match.arg(.keep) - .before <- enquo(.before) - .after <- enquo(.after) + .keep <- match.arg(.keep) + .before <- enquo(.before) + .after <- enquo(.after) - if (.keep %in% c("all", "unused") && length(exprs) == 0) { - # Nothing to do - return(out) - } + if (.keep %in% c("all", "unused") && length(exprs) == 0) { + # Nothing to do + return(out) + } - # Restrict the cases we support for now - has_aggregations <- any(unlist(lapply(exprs, all_funs)) %in% names(agg_funcs)) - if (has_aggregations) { - # ARROW-13926 - # mutate() on a grouped dataset does calculations within groups - # This doesn't matter on scalar ops (arithmetic etc.) but it does - # for things with aggregations (e.g. subtracting the mean) - return(abandon_ship(call, .data, "window functions not currently supported in Arrow")) - } + # Create a mask with aggregation functions in it + # If there are any aggregations, we will need to compute them and + # and join the results back in, for "window functions" like x - mean(x) + mask <- arrow_mask(out) + # Evaluate the mutate expressions + results <- list() + for (i in seq_along(exprs)) { + # Iterate over the indices and not the names because names may be repeated + # (which overwrites the previous name) + new_var <- names(exprs)[i] + results[[new_var]] <- arrow_eval(exprs[[i]], mask) + if (!inherits(results[[new_var]], "Expression") && + !is.null(results[[new_var]])) { + # We need some wrapping to handle literal values + if (length(results[[new_var]]) != 1) { + arrow_not_supported("Recycling values of length != 1", call = exprs[[i]]) + } + results[[new_var]] <- Expression$scalar(results[[new_var]]) + } + # Put it in the data mask too + mask[[new_var]] <- mask$.data[[new_var]] <- results[[new_var]] + } - mask <- arrow_mask(out) - results <- list() - for (i in seq_along(exprs)) { - # Iterate over the indices and not the names because names may be repeated - # (which overwrites the previous name) - new_var <- names(exprs)[i] - results[[new_var]] <- arrow_eval(exprs[[i]], mask) - if (inherits(results[[new_var]], "try-error")) { - msg <- handle_arrow_not_supported( - results[[new_var]], - format_expr(exprs[[i]]) - ) - return(abandon_ship(call, .data, msg)) - } else if (!inherits(results[[new_var]], "Expression") && - !is.null(results[[new_var]])) { - # We need some wrapping to handle literal values - if (length(results[[new_var]]) != 1) { - msg <- paste0("In ", new_var, " = ", format_expr(exprs[[i]]), ", only values of size one are recycled") - return(abandon_ship(call, .data, msg)) + if (length(mask$.aggregations)) { + # Make a copy of .data, do the aggregations on it, and then left_join on + # the group_by variables. + agg_query <- as_adq(.data) + # These may be computed by .by, make sure they're set + agg_query$group_by_vars <- grv + agg_query$aggregations <- mask$.aggregations + agg_query <- collapse.arrow_dplyr_query(agg_query) + if (length(grv)) { + out <- left_join(out, agg_query, by = grv) + } else { + # If there are no group_by vars, add a scalar column to both and join on that + agg_query$selected_columns[["..tempjoin"]] <- Expression$scalar(1L) + out$selected_columns[["..tempjoin"]] <- Expression$scalar(1L) + out <- left_join(out, agg_query, by = "..tempjoin") } - results[[new_var]] <- Expression$scalar(results[[new_var]]) } - # Put it in the data mask too - mask[[new_var]] <- mask$.data[[new_var]] <- results[[new_var]] - } - old_vars <- names(out$selected_columns) - # Note that this is names(exprs) not names(results): - # if results$new_var is NULL, that means we are supposed to remove it - new_vars <- names(exprs) + old_vars <- names(out$selected_columns) + # Note that this is names(exprs) not names(results): + # if results$new_var is NULL, that means we are supposed to remove it + new_vars <- names(exprs) - # Assign the new columns into the out$selected_columns - for (new_var in new_vars) { - out$selected_columns[[new_var]] <- results[[new_var]] - } + # Assign the new columns into the out$selected_columns + for (new_var in new_vars) { + out$selected_columns[[new_var]] <- results[[new_var]] + } - # Deduplicate new_vars and remove NULL columns from new_vars - new_vars <- intersect(union(new_vars, grv), names(out$selected_columns)) + # Prune any ..temp columns from the result, which would have come from + # .aggregations + temps <- grepl("^\\.\\.temp", names(out$selected_columns)) + out$selected_columns <- out$selected_columns[!temps] - # Respect .before and .after - if (!quo_is_null(.before) || !quo_is_null(.after)) { - new <- setdiff(new_vars, old_vars) - out <- dplyr::relocate(out, all_of(new), .before = !!.before, .after = !!.after) - } + # Deduplicate new_vars and remove NULL columns from new_vars + new_vars <- intersect(union(new_vars, grv), names(out$selected_columns)) - # Respect .keep - if (.keep == "none") { - ## for consistency with dplyr, this appends new columns after existing columns - ## by specifying the order - new_cols_last <- c(intersect(old_vars, new_vars), setdiff(new_vars, old_vars)) - out$selected_columns <- out$selected_columns[new_cols_last] - } else if (.keep != "all") { - # "used" or "unused" - used_vars <- unlist(lapply(exprs, all.vars), use.names = FALSE) - if (.keep == "used") { - out$selected_columns[setdiff(old_vars, used_vars)] <- NULL - } else { - # "unused" - out$selected_columns[intersect(old_vars, used_vars)] <- NULL + # Respect .before and .after + if (!quo_is_null(.before) || !quo_is_null(.after)) { + new <- setdiff(new_vars, old_vars) + out <- dplyr::relocate(out, all_of(new), .before = !!.before, .after = !!.after) } - } - if (by$from_by) { - out$group_by_vars <- character() - } + # Respect .keep + if (.keep == "none") { + ## for consistency with dplyr, this appends new columns after existing columns + ## by specifying the order + new_cols_last <- c(intersect(old_vars, new_vars), setdiff(new_vars, old_vars)) + out$selected_columns <- out$selected_columns[new_cols_last] + } else if (.keep != "all") { + # "used" or "unused" + used_vars <- unlist(lapply(exprs, all.vars), use.names = FALSE) + if (.keep == "used") { + out$selected_columns[setdiff(old_vars, used_vars)] <- NULL + } else { + # "unused" + out$selected_columns[intersect(old_vars, used_vars)] <- NULL + } + } + + if (by$from_by) { + out$group_by_vars <- character() + } - # Even if "none", we still keep group vars - ensure_group_vars(out) + # Even if "none", we still keep group vars + ensure_group_vars(out) + }) } mutate.Dataset <- mutate.ArrowTabular <- mutate.RecordBatchReader <- mutate.arrow_dplyr_query diff --git a/r/R/dplyr-slice.R b/r/R/dplyr-slice.R index bcb6547f7c8e9..2173d897f1f9d 100644 --- a/r/R/dplyr-slice.R +++ b/r/R/dplyr-slice.R @@ -148,7 +148,7 @@ prop_to_n <- function(.data, prop) { validate_prop <- function(prop) { if (!is.numeric(prop) || length(prop) != 1 || is.na(prop) || prop < 0 || prop > 1) { - stop("`prop` must be a single numeric value between 0 and 1", call. = FALSE) + validation_error("`prop` must be a single numeric value between 0 and 1") } } diff --git a/r/R/dplyr-summarize.R b/r/R/dplyr-summarize.R index 5bb81dc2b34fc..f4fda0f13aabd 100644 --- a/r/R/dplyr-summarize.R +++ b/r/R/dplyr-summarize.R @@ -18,39 +18,18 @@ # The following S3 methods are registered on load if dplyr is present summarise.arrow_dplyr_query <- function(.data, ..., .by = NULL, .groups = NULL) { - call <- match.call() - out <- as_adq(.data) + try_arrow_dplyr({ + out <- as_adq(.data) - by <- compute_by({{ .by }}, out, by_arg = ".by", data_arg = ".data") - - if (by$from_by) { - out$group_by_vars <- by$names - .groups <- "drop" - } - - exprs <- expand_across(out, quos(...), exclude_cols = out$group_by_vars) - - # Only retain the columns we need to do our aggregations - vars_to_keep <- unique(c( - unlist(lapply(exprs, all.vars)), # vars referenced in summarise - dplyr::group_vars(out) # vars needed for grouping - )) - # If exprs rely on the results of previous exprs - # (total = sum(x), mean = total / n()) - # then not all vars will correspond to columns in the data, - # so don't try to select() them (use intersect() to exclude them) - # Note that this select() isn't useful for the Arrow summarize implementation - # because it will effectively project to keep what it needs anyway, - # but the data.frame fallback version does benefit from select here - out <- dplyr::select(out, intersect(vars_to_keep, names(out))) - - # Try stuff, if successful return() - out <- try(do_arrow_summarize(out, !!!exprs, .groups = .groups), silent = TRUE) - if (inherits(out, "try-error")) { - out <- abandon_ship(call, .data, format(out)) - } + by <- compute_by({{ .by }}, out, by_arg = ".by", data_arg = ".data") + if (by$from_by) { + out$group_by_vars <- by$names + .groups <- "drop" + } - out + exprs <- expand_across(out, quos(...), exclude_cols = out$group_by_vars) + do_arrow_summarize(out, !!!exprs, .groups = .groups) + }) } summarise.Dataset <- summarise.ArrowTabular <- summarise.RecordBatchReader <- summarise.arrow_dplyr_query @@ -80,34 +59,32 @@ do_arrow_summarize <- function(.data, ..., .groups = NULL) { # ExecNode), and in the expressions, replace them with FieldRefs so that # further operations can happen (in what will become a ProjectNode that works # on the result of the Aggregate). - # To do this, we create a list in this function scope, and in arrow_mask(), - # and we make sure this environment here is the parent env of the binding - # functions, so that when they receive an expression, they can pull out - # aggregations and insert them into the list, which they can find because it - # is in the parent env. + # To do this, arrow_mask() includes a list called .aggregations, + # and the aggregation functions will pull out those terms and insert into + # that list. # nolint end - ..aggregations <- empty_named_list() - - # We'll collect any transformations after the aggregation here - ..post_mutate <- empty_named_list() - mask <- arrow_mask(.data, aggregation = TRUE) + mask <- arrow_mask(.data) + # We'll collect any transformations after the aggregation here. + # summarize_eval() returns NULL when the outer expression is an aggregation, + # i.e. there is no projection to do after + post_mutate <- empty_named_list() for (i in seq_along(exprs)) { # Iterate over the indices and not the names because names may be repeated # (which overwrites the previous name) name <- names(exprs)[i] - ..post_mutate[[name]] <- summarize_eval(name, exprs[[i]], mask) + post_mutate[[name]] <- summarize_eval(name, exprs[[i]], mask) } # Apply the results to the .data object. # First, the aggregations - .data$aggregations <- ..aggregations + .data$aggregations <- mask$.aggregations # Then collapse the query so that the resulting query object can have # additional operations applied to it out <- collapse.arrow_dplyr_query(.data) - # Now, add the projections in ..post_mutate (if any) - for (post in names(..post_mutate)) { + # Now, add the projections in post_mutate (if any) + for (post in names(post_mutate)) { # One last check: it's possible that an expression like y - mean(y) would # successfully evaluate, but it's not supported. It gets transformed to: # nolint start @@ -121,15 +98,14 @@ do_arrow_summarize <- function(.data, ..., .groups = NULL) { # We can tell the expression is invalid if it references fields not in # the schema of the data after summarize(). Evaulating its type will # throw an error if it's invalid. - tryCatch(..post_mutate[[post]]$type(out$.data$schema), error = function(e) { - msg <- paste( - "Expression", as_label(exprs[[post]]), - "is not a valid aggregation expression or is" + tryCatch(post_mutate[[post]]$type(out$.data$schema), error = function(e) { + arrow_not_supported( + "Expression is not a valid aggregation expression or is", + call = exprs[[post]] ) - arrow_not_supported(msg) }) # If it's valid, add it to the .data object - out$selected_columns[[post]] <- ..post_mutate[[post]] + out$selected_columns[[post]] <- post_mutate[[post]] } # Make sure column order is correct (and also drop ..temp columns) @@ -168,12 +144,18 @@ do_arrow_summarize <- function(.data, ..., .groups = NULL) { } else if (.groups == "keep") { out$group_by_vars <- .data$group_by_vars } else if (.groups == "rowwise") { - stop(arrow_not_supported('.groups = "rowwise"')) + arrow_not_supported( + '.groups = "rowwise"', + call = rlang::caller_call() + ) } else if (.groups == "drop") { # collapse() preserves groups so remove them out <- dplyr::ungroup(out) } else { - stop(paste("Invalid .groups argument:", .groups)) + validation_error( + paste("Invalid .groups argument:", .groups), + call = rlang::caller_call() + ) } out$drop_empty_groups <- .data$drop_empty_groups if (getOption("arrow.summarise.sort", FALSE)) { @@ -185,16 +167,6 @@ do_arrow_summarize <- function(.data, ..., .groups = NULL) { out } -arrow_eval_or_stop <- function(expr, mask) { - # TODO: change arrow_eval error handling behavior? - out <- arrow_eval(expr, mask) - if (inherits(out, "try-error")) { - msg <- handle_arrow_not_supported(out, format_expr(expr)) - stop(msg, call. = FALSE) - } - out -} - # This function returns a list of expressions which is used to project the data # before an aggregation. This list includes the fields used in the aggregation # expressions (the "targets") and the group fields. The names of the returned @@ -266,14 +238,14 @@ format_aggregation <- function(x) { # This function evaluates an expression and returns the post-summarize # projection that results, or NULL if there is none because the top-level # expression was an aggregation. Any aggregations are pulled out and collected -# in the ..aggregations list outside this function. +# in the .aggregations list outside this function. summarize_eval <- function(name, quosure, mask) { # Add previous aggregations to the mask, so they can be referenced - for (n in names(get("..aggregations", parent.frame()))) { + for (n in names(mask$.aggregations)) { mask[[n]] <- mask$.data[[n]] <- Expression$field_ref(n) } # Evaluate: - value <- arrow_eval_or_stop(quosure, mask) + value <- arrow_eval(quosure, mask) # Handle the result. There are a few different cases. if (!inherits(value, "Expression")) { @@ -286,14 +258,11 @@ summarize_eval <- function(name, quosure, mask) { # Handle case where outer expr is ..temp field ref. This came from an # aggregation at the top level. So the resulting name should be `name`. # not `..tempN`. Rename the corresponding aggregation. - post_aggs <- get("..aggregations", parent.frame()) result_field_name <- value$field_name - if (result_field_name %in% names(post_aggs)) { + if (result_field_name %in% names(mask$.aggregations)) { # Do this by assigning over `name` in case something else was in `name` - post_aggs[[name]] <- post_aggs[[result_field_name]] - post_aggs[[result_field_name]] <- NULL - # Assign back into the parent environment - assign("..aggregations", post_aggs, parent.frame()) + mask$.aggregations[[name]] <- mask$.aggregations[[result_field_name]] + mask$.aggregations[[result_field_name]] <- NULL # Return NULL because there is no post-mutate projection, it's just # the aggregation return(NULL) diff --git a/r/R/dplyr.R b/r/R/dplyr.R index f11b88d301ef9..93fcfdef28f28 100644 --- a/r/R/dplyr.R +++ b/r/R/dplyr.R @@ -338,22 +338,6 @@ ensure_arrange_vars <- function(x) { x } -# Helper to handle unsupported dplyr features -# * For Table/RecordBatch, we collect() and then call the dplyr method in R -# * For Dataset, we just error -abandon_ship <- function(call, .data, msg) { - msg <- trimws(msg) - dplyr_fun_name <- sub("^(.*?)\\..*", "\\1", as.character(call[[1]])) - if (query_on_dataset(.data)) { - stop(msg, "\nCall collect() first to pull data into R.", call. = FALSE) - } - # else, collect and call dplyr method - warning(msg, "; pulling data into R", immediate. = TRUE, call. = FALSE) - call$.data <- dplyr::collect(.data) - call[[1]] <- get(dplyr_fun_name, envir = asNamespace("dplyr")) - eval.parent(call, 2) -} - query_on_dataset <- function(x) { any(map_lgl(all_sources(x), ~ inherits(., c("Dataset", "RecordBatchReader")))) } diff --git a/r/R/query-engine.R b/r/R/query-engine.R index 0f8a84f9b867e..fb48d790fd36e 100644 --- a/r/R/query-engine.R +++ b/r/R/query-engine.R @@ -148,7 +148,8 @@ ExecPlan <- R6Class("ExecPlan", left_output = .data$join$left_output, right_output = .data$join$right_output, left_suffix = .data$join$suffix[[1]], - right_suffix = .data$join$suffix[[2]] + right_suffix = .data$join$suffix[[2]], + na_matches = .data$join$na_matches ) } @@ -307,7 +308,7 @@ ExecNode <- R6Class("ExecNode", out$extras$source_schema$metadata[["r"]]$attributes <- NULL out }, - Join = function(type, right_node, by, left_output, right_output, left_suffix, right_suffix) { + Join = function(type, right_node, by, left_output, right_output, left_suffix, right_suffix, na_matches = TRUE) { self$preserve_extras( ExecNode_Join( self, @@ -318,7 +319,8 @@ ExecNode <- R6Class("ExecNode", left_output = left_output, right_output = right_output, output_suffix_for_left = left_suffix, - output_suffix_for_right = right_suffix + output_suffix_for_right = right_suffix, + na_matches = na_matches ) ) }, diff --git a/r/R/udf.R b/r/R/udf.R index 922095cceba6a..0415fbac3c9fc 100644 --- a/r/R/udf.R +++ b/r/R/udf.R @@ -95,12 +95,7 @@ register_scalar_function <- function(name, fun, in_type, out_type, body(binding_fun) <- expr_substitute(body(binding_fun), sym("name"), name) environment(binding_fun) <- asNamespace("arrow") - register_binding( - name, - binding_fun, - update_cache = TRUE - ) - + register_binding(name, binding_fun) invisible(NULL) } diff --git a/r/R/util.R b/r/R/util.R index a7cb5b3792d29..14e4544ab1e54 100644 --- a/r/R/util.R +++ b/r/R/util.R @@ -15,20 +15,6 @@ # specific language governing permissions and limitations # under the License. -# for compatibility with R versions earlier than 4.0.0 -if (!exists("deparse1")) { - deparse1 <- function(expr, collapse = " ", width.cutoff = 500L, ...) { - paste(deparse(expr, width.cutoff, ...), collapse = collapse) - } -} - -# for compatibility with R versions earlier than 3.6.0 -if (!exists("str2lang")) { - str2lang <- function(s) { - parse(text = s, keep.source = FALSE)[[1]] - } -} - oxford_paste <- function(x, conjunction = "and", quote = TRUE, diff --git a/r/README.md b/r/README.md index 710fa8e8d7cb5..c3cd5a32eaf69 100644 --- a/r/README.md +++ b/r/README.md @@ -12,7 +12,7 @@ The R `{arrow}` package provides access to many of the features of the [Apache Arrow C++ library](https://arrow.apache.org/docs/cpp/index.html) for R users. The goal of arrow is to provide an Arrow C++ backend to `{dplyr}`, and access to the Arrow C++ library through familiar base R and tidyverse functions, or `{R6}` classes. -To learn more about the Apache Arrow project, see the parent documentation of the [Arrow Project](https://arrow.apache.org/). The Arrow project provides functionality for a wide range of data analysis tasks to store, process and move data fast. See the [read/write article](articles/read_write.html) to learn about reading and writing data files, [data wrangling](articles/data_wrangling.html) to learn how to use dplyr syntax with arrow objects, and the [function documentation](reference/acero.html) for a full list of supported functions within dplyr queries. +To learn more about the Apache Arrow project, see the parent documentation of the [Arrow Project](https://arrow.apache.org/). The Arrow project provides functionality for a wide range of data analysis tasks to store, process and move data fast. See the [read/write article](https://arrow.apache.org/docs/r/articles/read_write.html) to learn about reading and writing data files, [data wrangling](https://arrow.apache.org/docs/r/articles/data_wrangling.html) to learn how to use dplyr syntax with arrow objects, and the [function documentation](https://arrow.apache.org/docs/r/reference/acero.html) for a full list of supported functions within dplyr queries. ## Installation @@ -33,11 +33,11 @@ There are some special cases to note: - On macOS, the R you use with Arrow should match the architecture of the machine you are using. If you're using an ARM (aka M1, M2, etc.) processor use R compiled for arm64. If you're using an Intel based mac, use R compiled for x86. Using R and Arrow compiled for Intel based macs on an ARM based mac will result in segfaults and crashes. -- On Linux the installation process can sometimes be more involved because CRAN does not host binaries for Linux. For more information please see the [installation guide](articles/install.html). +- On Linux the installation process can sometimes be more involved because CRAN does not host binaries for Linux. For more information please see the [installation guide](https://arrow.apache.org/docs/r/articles/install.html). - If you are compiling arrow from source, please note that as of version 10.0.0, arrow requires C++17 to build. This has implications on Windows and CentOS 7. For Windows users it means you need to be running an R version of 4.0 or later. On CentOS 7, it means you need to install a newer compiler than the default system compiler gcc. See the [installation details article](https://arrow.apache.org/docs/r/articles/developers/install_details.html) for guidance. -- Development versions of arrow are released nightly. For information on how to installl nighhtly builds please see the [installing nightly builds](articles/install_nightly.html) article. +- Development versions of arrow are released nightly. For information on how to installl nightly builds please see the [installing nightly builds](https://arrow.apache.org/docs/r/articles/install_nightly.html) article. ## What can the arrow package do? diff --git a/r/_pkgdown.yml b/r/_pkgdown.yml index 1ce35d2a546ca..ceb68d773bdb4 100644 --- a/r/_pkgdown.yml +++ b/r/_pkgdown.yml @@ -137,7 +137,6 @@ articles: - developers/workflow - developers/debugging - developers/docker - - developers/writing_bindings - developers/install_details - developers/data_object_layout diff --git a/r/inst/build_arrow_static.sh b/r/inst/build_arrow_static.sh index d28cbcb08fbec..825a230e78e5e 100755 --- a/r/inst/build_arrow_static.sh +++ b/r/inst/build_arrow_static.sh @@ -99,6 +99,7 @@ ${CMAKE} -DARROW_BOOST_USE_SHARED=OFF \ -DCMAKE_EXPORT_NO_PACKAGE_REGISTRY=ON \ -DCMAKE_FIND_PACKAGE_NO_PACKAGE_REGISTRY=ON \ -DCMAKE_UNITY_BUILD=${CMAKE_UNITY_BUILD:-OFF} \ + -DOPENSSL_ROOT_DIR=${OPENSSL_ROOT_DIR} \ -Dre2_SOURCE=${re2_SOURCE:-BUNDLED} \ -Dxsimd_SOURCE=${xsimd_SOURCE:-} \ -Dzstd_SOURCE=${zstd_SOURCE:-} \ diff --git a/r/man/acero.Rd b/r/man/acero.Rd index 365795d9fc65c..9ef9cd7dda6fb 100644 --- a/r/man/acero.Rd +++ b/r/man/acero.Rd @@ -23,7 +23,7 @@ the query on the data. To run the query, call either \code{compute()}, which returns an \code{arrow} \link{Table}, or \code{collect()}, which pulls the resulting Table into an R \code{tibble}. \itemize{ -\item \code{\link[dplyr:filter-joins]{anti_join()}}: the \code{copy} and \code{na_matches} arguments are ignored +\item \code{\link[dplyr:filter-joins]{anti_join()}}: the \code{copy} argument is ignored \item \code{\link[dplyr:arrange]{arrange()}} \item \code{\link[dplyr:compute]{collapse()}} \item \code{\link[dplyr:compute]{collect()}} @@ -32,22 +32,22 @@ Table into an R \code{tibble}. \item \code{\link[dplyr:distinct]{distinct()}}: \code{.keep_all = TRUE} not supported \item \code{\link[dplyr:explain]{explain()}} \item \code{\link[dplyr:filter]{filter()}} -\item \code{\link[dplyr:mutate-joins]{full_join()}}: the \code{copy} and \code{na_matches} arguments are ignored +\item \code{\link[dplyr:mutate-joins]{full_join()}}: the \code{copy} argument is ignored \item \code{\link[dplyr:glimpse]{glimpse()}} \item \code{\link[dplyr:group_by]{group_by()}} \item \code{\link[dplyr:group_by_drop_default]{group_by_drop_default()}} \item \code{\link[dplyr:group_data]{group_vars()}} \item \code{\link[dplyr:group_data]{groups()}} -\item \code{\link[dplyr:mutate-joins]{inner_join()}}: the \code{copy} and \code{na_matches} arguments are ignored -\item \code{\link[dplyr:mutate-joins]{left_join()}}: the \code{copy} and \code{na_matches} arguments are ignored -\item \code{\link[dplyr:mutate]{mutate()}}: window functions (e.g. things that require aggregation within groups) not currently supported +\item \code{\link[dplyr:mutate-joins]{inner_join()}}: the \code{copy} argument is ignored +\item \code{\link[dplyr:mutate-joins]{left_join()}}: the \code{copy} argument is ignored +\item \code{\link[dplyr:mutate]{mutate()}} \item \code{\link[dplyr:pull]{pull()}}: the \code{name} argument is not supported; returns an R vector by default but this behavior is deprecated and will return an Arrow \link{ChunkedArray} in a future release. Provide \code{as_vector = TRUE/FALSE} to control this behavior, or set \code{options(arrow.pull_as_vector)} globally. \item \code{\link[dplyr:relocate]{relocate()}} \item \code{\link[dplyr:rename]{rename()}} \item \code{\link[dplyr:rename]{rename_with()}} -\item \code{\link[dplyr:mutate-joins]{right_join()}}: the \code{copy} and \code{na_matches} arguments are ignored +\item \code{\link[dplyr:mutate-joins]{right_join()}}: the \code{copy} argument is ignored \item \code{\link[dplyr:select]{select()}} -\item \code{\link[dplyr:filter-joins]{semi_join()}}: the \code{copy} and \code{na_matches} arguments are ignored +\item \code{\link[dplyr:filter-joins]{semi_join()}}: the \code{copy} argument is ignored \item \code{\link[dplyr:explain]{show_query()}} \item \code{\link[dplyr:slice]{slice_head()}}: slicing within groups not supported; Arrow datasets do not have row order, so head is non-deterministic; \code{prop} only supported on queries where \code{nrow()} is knowable without evaluating \item \code{\link[dplyr:slice]{slice_max()}}: slicing within groups not supported; \code{with_ties = TRUE} (dplyr default) is not supported; \code{prop} only supported on queries where \code{nrow()} is knowable without evaluating diff --git a/r/man/arrow_not_supported.Rd b/r/man/arrow_not_supported.Rd new file mode 100644 index 0000000000000..be6a001fa1fa4 --- /dev/null +++ b/r/man/arrow_not_supported.Rd @@ -0,0 +1,56 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/dplyr-eval.R +\name{arrow_not_supported} +\alias{arrow_not_supported} +\alias{validation_error} +\title{Helpers to raise classed errors} +\usage{ +arrow_not_supported( + msg, + .actual_msg = paste(msg, "not supported in Arrow"), + ... +) + +validation_error(msg, ...) +} +\arguments{ +\item{msg}{The message to show. \code{arrow_not_supported()} will append +"not supported in Arrow" to this message.} + +\item{.actual_msg}{If you don't want to append "not supported in Arrow" to +the message, you can provide the full message here.} + +\item{...}{Additional arguments to pass to \code{rlang::abort()}. Useful arguments +include \code{call} to provide the call or expression that caused the error, and +\code{body} to provide additional context about the error.} +} +\description{ +\code{arrow_not_supported()} and \code{validation_error()} raise classed errors that +allow us to distinguish between things that are not supported in Arrow and +things that are just invalid input. Additional wrapping in \code{arrow_eval()} +and \code{try_arrow_dplyr()} provide more context and suggestions. +Importantly, if \code{arrow_not_supported} is raised, then retrying the same code +in regular dplyr in R may work. But if \code{validation_error} is raised, then we +shouldn't recommend retrying with regular dplyr because it will fail there +too. +} +\details{ +Use these in function bindings and in the dplyr methods. Inside of function +bindings, you don't need to provide the \code{call} argument, as it will be +automatically filled in with the expression that caused the error in +\code{arrow_eval()}. In dplyr methods, you should provide the \code{call} argument; +\code{rlang::caller_call()} often is correct, but you may need to experiment to +find how far up the call stack you need to look. + +You may provide additional information in the \code{body} argument, a named +character vector. Use \code{i} for additional information about the error and \code{>} +to indicate potential solutions or workarounds that don't require pulling the +data into R. If you have an \code{arrow_not_supported()} error with a \code{>} +suggestion, when the error is ultimately raised by \code{try_error_dplyr()}, +\verb{Call collect() first to pull data into R} won't be the only suggestion. + +You can still use \code{match.arg()} and \code{assert_that()} for simple input +validation inside of the function bindings. \code{arrow_eval()} will catch their +errors and re-raise them as \code{validation_error}. +} +\keyword{internal} diff --git a/r/man/register_binding.Rd b/r/man/register_binding.Rd index d10cd733bbe9d..b84cde3b8993a 100644 --- a/r/man/register_binding.Rd +++ b/r/man/register_binding.Rd @@ -2,63 +2,28 @@ % Please edit documentation in R/dplyr-funcs.R \name{register_binding} \alias{register_binding} -\alias{register_binding_agg} \title{Register compute bindings} \usage{ -register_binding( - fun_name, - fun, - registry = nse_funcs, - update_cache = FALSE, - notes = character(0) -) - -register_binding_agg( - fun_name, - agg_fun, - registry = agg_funcs, - notes = character(0) -) +register_binding(fun_name, fun, notes = character(0)) } \arguments{ \item{fun_name}{A string containing a function name in the form \code{"function"} or -\code{"package::function"}. The package name is currently not used but -may be used in the future to allow these types of function calls.} +\code{"package::function"}.} -\item{fun}{A function or \code{NULL} to un-register a previous function. +\item{fun}{A function, or \code{NULL} to un-register a previous function. This function must accept \code{Expression} objects as arguments and return \code{Expression} objects instead of regular R objects.} -\item{registry}{An environment in which the functions should be -assigned.} - -\item{update_cache}{Update .cache$functions at the time of registration. -the default is FALSE because the majority of usage is to register -bindings at package load, after which we create the cache once. The -reason why .cache$functions is needed in addition to nse_funcs for -non-aggregate functions could be revisited...it is currently used -as the data mask in mutate, filter, and aggregate (but not -summarise) because the data mask has to be a list.} - \item{notes}{string for the docs: note any limitations or differences in behavior between the Arrow version and the R function.} - -\item{agg_fun}{An aggregate function or \code{NULL} to un-register a previous -aggregate function. This function must accept \code{Expression} objects as -arguments and return a \code{list()} with components: -\itemize{ -\item \code{fun}: string function name -\item \code{data}: list of 0 or more \code{Expression}s -\item \code{options}: list of function options, as passed to call_function -}} } \value{ The previously registered binding or \code{NULL} if no previously registered function existed. } \description{ -The \code{register_binding()} and \code{register_binding_agg()} functions -are used to populate a list of functions that operate on (and return) +\code{register_binding()} is used to populate a list of functions that operate on +(and return) Expressions. These are the basis for the \code{.data} mask inside dplyr methods. } \section{Writing bindings}{ diff --git a/r/pkgdown/assets/versions.json b/r/pkgdown/assets/versions.json index 75d179f240515..43f0b3fac62a1 100644 --- a/r/pkgdown/assets/versions.json +++ b/r/pkgdown/assets/versions.json @@ -1,10 +1,10 @@ [ { - "name": "16.0.0.9000 (dev)", + "name": "16.1.0.9000 (dev)", "version": "dev/" }, { - "name": "16.0.0 (release)", + "name": "16.1.0 (release)", "version": "" }, { diff --git a/r/src/arrowExports.cpp b/r/src/arrowExports.cpp index a4c4b614d6d75..d5aec50219e0b 100644 --- a/r/src/arrowExports.cpp +++ b/r/src/arrowExports.cpp @@ -1163,8 +1163,8 @@ extern "C" SEXP _arrow_ExecNode_Aggregate(SEXP input_sexp, SEXP options_sexp, SE // compute-exec.cpp #if defined(ARROW_R_WITH_ACERO) -std::shared_ptr ExecNode_Join(const std::shared_ptr& input, acero::JoinType join_type, const std::shared_ptr& right_data, std::vector left_keys, std::vector right_keys, std::vector left_output, std::vector right_output, std::string output_suffix_for_left, std::string output_suffix_for_right); -extern "C" SEXP _arrow_ExecNode_Join(SEXP input_sexp, SEXP join_type_sexp, SEXP right_data_sexp, SEXP left_keys_sexp, SEXP right_keys_sexp, SEXP left_output_sexp, SEXP right_output_sexp, SEXP output_suffix_for_left_sexp, SEXP output_suffix_for_right_sexp){ +std::shared_ptr ExecNode_Join(const std::shared_ptr& input, acero::JoinType join_type, const std::shared_ptr& right_data, std::vector left_keys, std::vector right_keys, std::vector left_output, std::vector right_output, std::string output_suffix_for_left, std::string output_suffix_for_right, bool na_matches); +extern "C" SEXP _arrow_ExecNode_Join(SEXP input_sexp, SEXP join_type_sexp, SEXP right_data_sexp, SEXP left_keys_sexp, SEXP right_keys_sexp, SEXP left_output_sexp, SEXP right_output_sexp, SEXP output_suffix_for_left_sexp, SEXP output_suffix_for_right_sexp, SEXP na_matches_sexp){ BEGIN_CPP11 arrow::r::Input&>::type input(input_sexp); arrow::r::Input::type join_type(join_type_sexp); @@ -1175,11 +1175,12 @@ BEGIN_CPP11 arrow::r::Input>::type right_output(right_output_sexp); arrow::r::Input::type output_suffix_for_left(output_suffix_for_left_sexp); arrow::r::Input::type output_suffix_for_right(output_suffix_for_right_sexp); - return cpp11::as_sexp(ExecNode_Join(input, join_type, right_data, left_keys, right_keys, left_output, right_output, output_suffix_for_left, output_suffix_for_right)); + arrow::r::Input::type na_matches(na_matches_sexp); + return cpp11::as_sexp(ExecNode_Join(input, join_type, right_data, left_keys, right_keys, left_output, right_output, output_suffix_for_left, output_suffix_for_right, na_matches)); END_CPP11 } #else -extern "C" SEXP _arrow_ExecNode_Join(SEXP input_sexp, SEXP join_type_sexp, SEXP right_data_sexp, SEXP left_keys_sexp, SEXP right_keys_sexp, SEXP left_output_sexp, SEXP right_output_sexp, SEXP output_suffix_for_left_sexp, SEXP output_suffix_for_right_sexp){ +extern "C" SEXP _arrow_ExecNode_Join(SEXP input_sexp, SEXP join_type_sexp, SEXP right_data_sexp, SEXP left_keys_sexp, SEXP right_keys_sexp, SEXP left_output_sexp, SEXP right_output_sexp, SEXP output_suffix_for_left_sexp, SEXP output_suffix_for_right_sexp, SEXP na_matches_sexp){ Rf_error("Cannot call ExecNode_Join(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. "); } #endif @@ -5790,7 +5791,7 @@ static const R_CallMethodDef CallEntries[] = { { "_arrow_ExecNode_Filter", (DL_FUNC) &_arrow_ExecNode_Filter, 2}, { "_arrow_ExecNode_Project", (DL_FUNC) &_arrow_ExecNode_Project, 3}, { "_arrow_ExecNode_Aggregate", (DL_FUNC) &_arrow_ExecNode_Aggregate, 3}, - { "_arrow_ExecNode_Join", (DL_FUNC) &_arrow_ExecNode_Join, 9}, + { "_arrow_ExecNode_Join", (DL_FUNC) &_arrow_ExecNode_Join, 10}, { "_arrow_ExecNode_Union", (DL_FUNC) &_arrow_ExecNode_Union, 2}, { "_arrow_ExecNode_Fetch", (DL_FUNC) &_arrow_ExecNode_Fetch, 3}, { "_arrow_ExecNode_OrderBy", (DL_FUNC) &_arrow_ExecNode_OrderBy, 2}, diff --git a/r/src/compute-exec.cpp b/r/src/compute-exec.cpp index e0b3c62c47d7f..d0c50315c299f 100644 --- a/r/src/compute-exec.cpp +++ b/r/src/compute-exec.cpp @@ -411,10 +411,17 @@ std::shared_ptr ExecNode_Join( const std::shared_ptr& right_data, std::vector left_keys, std::vector right_keys, std::vector left_output, std::vector right_output, - std::string output_suffix_for_left, std::string output_suffix_for_right) { + std::string output_suffix_for_left, std::string output_suffix_for_right, + bool na_matches) { std::vector left_refs, right_refs, left_out_refs, right_out_refs; + std::vector key_cmps; for (auto&& name : left_keys) { left_refs.emplace_back(std::move(name)); + // Populate key_cmps in this loop, one for each key + // Note that Acero supports having different values for each key, but dplyr + // only supports one value for all keys, so we're only going to support that + // for now. + key_cmps.emplace_back(na_matches ? acero::JoinKeyCmp::IS : acero::JoinKeyCmp::EQ); } for (auto&& name : right_keys) { right_refs.emplace_back(std::move(name)); @@ -434,10 +441,11 @@ std::shared_ptr ExecNode_Join( return MakeExecNodeOrStop( "hashjoin", input->plan(), {input.get(), right_data.get()}, - acero::HashJoinNodeOptions{ - join_type, std::move(left_refs), std::move(right_refs), - std::move(left_out_refs), std::move(right_out_refs), compute::literal(true), - std::move(output_suffix_for_left), std::move(output_suffix_for_right)}); + acero::HashJoinNodeOptions{join_type, std::move(left_refs), std::move(right_refs), + std::move(left_out_refs), std::move(right_out_refs), + std::move(key_cmps), compute::literal(true), + std::move(output_suffix_for_left), + std::move(output_suffix_for_right)}); } // [[acero::export]] diff --git a/r/src/extension-impl.cpp b/r/src/extension-impl.cpp index a13b252b2832f..14c771cc98e4f 100644 --- a/r/src/extension-impl.cpp +++ b/r/src/extension-impl.cpp @@ -87,7 +87,9 @@ arrow::Result> RExtensionType::Deserialize( return std::shared_ptr(cloned.release()); } -std::string RExtensionType::ToString() const { +std::string RExtensionType::ToString() const { return ToString(false); } + +std::string RExtensionType::ToString(bool show_metadata) const { arrow::Result result = SafeCallIntoR([&]() { cpp11::environment instance = r6_instance(); cpp11::function instance_ToString(instance["ToString"]); @@ -98,7 +100,11 @@ std::string RExtensionType::ToString() const { // In the event of an error (e.g., we are not on the main thread // and we are not inside RunWithCapturedR()), just call the default method if (!result.ok()) { +#if ARROW_VERSION_MAJOR >= 16 + return ExtensionType::ToString(show_metadata); +#else return ExtensionType::ToString(); +#endif } else { return result.ValueUnsafe(); } diff --git a/r/src/extension.h b/r/src/extension.h index fbd3ad484691a..6e6c6f7c29761 100644 --- a/r/src/extension.h +++ b/r/src/extension.h @@ -52,6 +52,8 @@ class RExtensionType : public arrow::ExtensionType { std::string Serialize() const { return extension_metadata_; } + std::string ToString(bool show_metadata = false) const; + // wrapper for libarrow < 16 std::string ToString() const; cpp11::sexp Convert(const std::shared_ptr& array) const; diff --git a/r/tests/testthat/_snaps/dataset-dplyr.md b/r/tests/testthat/_snaps/dataset-dplyr.md new file mode 100644 index 0000000000000..a2d9820a4e78a --- /dev/null +++ b/r/tests/testthat/_snaps/dataset-dplyr.md @@ -0,0 +1,9 @@ +# dplyr method not implemented messages + + Code + ds %>% filter(int > 6, dbl > max(dbl)) + Condition + Error in `dbl > max(dbl)`: + ! Expression not supported in filter() in Arrow + > Call collect() first to pull data into R. + diff --git a/r/tests/testthat/_snaps/dplyr-across.md b/r/tests/testthat/_snaps/dplyr-across.md new file mode 100644 index 0000000000000..47b5bd61b39ce --- /dev/null +++ b/r/tests/testthat/_snaps/dplyr-across.md @@ -0,0 +1,11 @@ +# expand_across correctly expands quosures + + Code + InMemoryDataset$create(example_data) %>% mutate(across(c(dbl, dbl2), round, + digits = -1)) + Condition + Error in `mutate.Dataset()`: + ! `...` argument to `across()` is deprecated in dplyr and not supported in Arrow + > Convert your call into a function or formula including the arguments + > Or, call collect() first to pull data into R. + diff --git a/r/tests/testthat/_snaps/dplyr-eval.md b/r/tests/testthat/_snaps/dplyr-eval.md new file mode 100644 index 0000000000000..0b4639f1fe7a7 --- /dev/null +++ b/r/tests/testthat/_snaps/dplyr-eval.md @@ -0,0 +1,27 @@ +# try_arrow_dplyr/abandon_ship adds the right message about collect() + + Code + tester(ds, i) + Condition + Error in `validation_error()`: + ! arg is 0 + +--- + + Code + tester(ds, i) + Condition + Error in `arrow_not_supported()`: + ! arg == 1 not supported in Arrow + > Call collect() first to pull data into R. + +--- + + Code + tester(ds, i) + Condition + Error in `arrow_not_supported()`: + ! arg greater than 0 not supported in Arrow + > Try setting arg to -1 + > Or, call collect() first to pull data into R. + diff --git a/r/tests/testthat/_snaps/dplyr-funcs-datetime.md b/r/tests/testthat/_snaps/dplyr-funcs-datetime.md new file mode 100644 index 0000000000000..036c8b49e80a0 --- /dev/null +++ b/r/tests/testthat/_snaps/dplyr-funcs-datetime.md @@ -0,0 +1,11 @@ +# `as.Date()` and `as_date()` + + Code + test_df %>% InMemoryDataset$create() %>% transmute(date_char_ymd = as.Date( + character_ymd_var, tryFormats = c("%Y-%m-%d", "%Y/%m/%d"))) %>% collect() + Condition + Error in `as.Date()`: + ! `as.Date()` with multiple `tryFormats` not supported in Arrow + > Consider using the lubridate specialised parsing functions `ymd()`, `ymd()`, etc. + > Or, call collect() first to pull data into R. + diff --git a/r/tests/testthat/_snaps/dplyr-mutate.md b/r/tests/testthat/_snaps/dplyr-mutate.md new file mode 100644 index 0000000000000..a5bbc0163bc4f --- /dev/null +++ b/r/tests/testthat/_snaps/dplyr-mutate.md @@ -0,0 +1,25 @@ +# transmute() defuses dots arguments (ARROW-13262) + + Code + tbl %>% Table$create() %>% transmute(a = stringr::str_c(padded_strings, + padded_strings), b = stringr::str_squish(a)) %>% collect() + Condition + Warning: + In stringr::str_squish(a): + i Expression not supported in Arrow + > Pulling data into R + Output + # A tibble: 10 x 2 + a b + + 1 " a a " a a + 2 " b b " b b + 3 " c c " c c + 4 " d d " d d + 5 " e e " e e + 6 " f f " f f + 7 " g g " g g + 8 " h h " h h + 9 " i i " i i + 10 " j j " j j + diff --git a/r/tests/testthat/_snaps/dplyr-query.md b/r/tests/testthat/_snaps/dplyr-query.md index a9d4da26cca0b..cf5ac594acb2b 100644 --- a/r/tests/testthat/_snaps/dplyr-query.md +++ b/r/tests/testthat/_snaps/dplyr-query.md @@ -1,4 +1,6 @@ # Scalars in expressions match the type of the field, if possible - Expression int == "5" not supported in Arrow; pulling data into R + In int == "5": + i Expression not supported in Arrow + > Pulling data into R diff --git a/r/tests/testthat/_snaps/dplyr-summarize.md b/r/tests/testthat/_snaps/dplyr-summarize.md index bbb8e64bfe790..449a194d68fe5 100644 --- a/r/tests/testthat/_snaps/dplyr-summarize.md +++ b/r/tests/testthat/_snaps/dplyr-summarize.md @@ -3,11 +3,44 @@ Code InMemoryDataset$create(tbl) %>% summarize(distinct = n_distinct()) Condition - Error: - ! Error : In n_distinct(), n_distinct() with 0 arguments not supported in Arrow - Call collect() first to pull data into R. + Error in `n_distinct()`: + ! n_distinct() with 0 arguments not supported in Arrow + > Call collect() first to pull data into R. --- - Error : In n_distinct(int, lgl), Multiple arguments to n_distinct() not supported in Arrow; pulling data into R + In n_distinct(int, lgl): + i Multiple arguments to n_distinct() not supported in Arrow + > Pulling data into R + +# Expressions on aggregations + + Code + record_batch(tbl) %>% summarise(any(any(lgl))) + Condition + Warning: + In any(any(lgl)): + i aggregate within aggregate expression not supported in Arrow + > Pulling data into R + Output + # A tibble: 1 x 1 + `any(any(lgl))` + + 1 TRUE + +# Can use across() within summarise() + + Code + data.frame(x = 1, y = 2) %>% arrow_table() %>% group_by(x) %>% summarise(across( + everything())) %>% collect() + Condition + Warning: + In y: + i Expression is not a valid aggregation expression or is not supported in Arrow + > Pulling data into R + Output + # A tibble: 1 x 2 + x y + + 1 1 2 diff --git a/r/tests/testthat/helper-expectation.R b/r/tests/testthat/helper-expectation.R index 090ed36aa7f94..63d0163aa3129 100644 --- a/r/tests/testthat/helper-expectation.R +++ b/r/tests/testthat/helper-expectation.R @@ -88,7 +88,7 @@ compare_dplyr_binding <- function(expr, tbl, warning = NA, ...) { if (isTRUE(warning)) { # Special-case the simple warning: - warning <- "not supported in Arrow; pulling data into R" + warning <- "> Pulling data into R" } # Evaluate `expr` on a Table object and compare with `expected` @@ -289,3 +289,8 @@ split_vector_as_list <- function(vec) { expect_across_equal <- function(across_expr, expected, tbl) { expect_identical(expand_across(as_adq(tbl), across_expr), new_quosures(expected)) } + +expect_arrow_eval_error <- function(expr, ..., .data = example_data) { + mask <- arrow_mask(as_adq(.data)) + expect_error(arrow_eval({{ expr }}, mask), ...) +} diff --git a/r/tests/testthat/test-Array.R b/r/tests/testthat/test-Array.R index bb005605de318..98068bdea20d5 100644 --- a/r/tests/testthat/test-Array.R +++ b/r/tests/testthat/test-Array.R @@ -818,11 +818,6 @@ test_that("Handling string data with embedded nuls", { ) array_with_nul <- arrow_array(raws)$cast(utf8()) - # The behavior of the warnings/errors is slightly different with and without - # altrep. Without it (i.e. 3.5.0 and below, the error would trigger immediately - # on `as.vector()` where as with it, the error only happens on materialization) - skip_on_r_older_than("3.6") - # no error on conversion, because altrep laziness v <- expect_error(as.vector(array_with_nul), NA) diff --git a/r/tests/testthat/test-RecordBatch.R b/r/tests/testthat/test-RecordBatch.R index f29b75dbf4095..5987f5a4b7c17 100644 --- a/r/tests/testthat/test-RecordBatch.R +++ b/r/tests/testthat/test-RecordBatch.R @@ -595,14 +595,10 @@ test_that("RecordBatch supports cbind", { ) # Rejects Table and ChunkedArray arguments - if (getRversion() >= "4.0.0") { - # R 3.6 cbind dispatch rules cause cbind to fall back to default impl if - # there are multiple arguments with distinct cbind implementations - expect_error( - cbind(record_batch(a = 1:2), arrow_table(b = 3:4)), - regexp = "Cannot cbind a RecordBatch with Tables or ChunkedArrays" - ) - } + expect_error( + cbind(record_batch(a = 1:2), arrow_table(b = 3:4)), + regexp = "Cannot cbind a RecordBatch with Tables or ChunkedArrays" + ) expect_error( cbind(record_batch(a = 1:2), b = chunked_array(1, 2)), regexp = "Cannot cbind a RecordBatch with Tables or ChunkedArrays" @@ -622,10 +618,6 @@ test_that("Handling string data with embedded nuls", { batch_with_nul <- record_batch(a = 1:5, b = raws) batch_with_nul$b <- batch_with_nul$b$cast(utf8()) - # The behavior of the warnings/errors is slightly different with and without - # altrep. Without it (i.e. 3.5.0 and below, the error would trigger immediately - # on `as.vector()` where as with it, the error only happens on materialization) - skip_on_r_older_than("3.6") df <- as.data.frame(batch_with_nul) expect_error( diff --git a/r/tests/testthat/test-Table.R b/r/tests/testthat/test-Table.R index ce3254a158eee..f6cec3b2b7683 100644 --- a/r/tests/testthat/test-Table.R +++ b/r/tests/testthat/test-Table.R @@ -581,10 +581,6 @@ test_that("Table supports cbind", { }) test_that("cbind.Table handles record batches and tables", { - # R 3.6 cbind dispatch rules cause cbind to fall back to default impl if - # there are multiple arguments with distinct cbind implementations - skip_if(getRversion() < "4.0.0", "R 3.6 cbind dispatch rules prevent this behavior") - expect_equal( cbind(arrow_table(a = 1L:2L), record_batch(b = 4:5)), arrow_table(a = 1L:2L, b = 4:5) diff --git a/r/tests/testthat/test-altrep.R b/r/tests/testthat/test-altrep.R index 7a66d0e778282..50bd40988e550 100644 --- a/r/tests/testthat/test-altrep.R +++ b/r/tests/testthat/test-altrep.R @@ -15,8 +15,6 @@ # specific language governing permissions and limitations # under the License. -skip_on_r_older_than("3.6") - test_that("altrep test functions do not include base altrep", { expect_false(is_arrow_altrep(1:10)) expect_identical(test_arrow_altrep_is_materialized(1:10), NA) @@ -373,6 +371,11 @@ test_that("altrep min/max/sum identical to R versions for double", { expect_altrep_roundtrip(x, max) expect_altrep_roundtrip(x, sum) + # On valgrind the NA_real_ is sometimes transformed to NaN + # https://stat.ethz.ch/pipermail/r-devel/2021-April/080683.html + # so we skip these there to avoid complicated NA == NaN logic, + # and they are tested on a number of other platforms / conditions + skip_on_linux_devel() x <- c(1, 2, NA_real_) expect_altrep_roundtrip(x, min, na.rm = TRUE) expect_altrep_roundtrip(x, max, na.rm = TRUE) diff --git a/r/tests/testthat/test-chunked-array.R b/r/tests/testthat/test-chunked-array.R index 223f5022d3b94..bb01df427f713 100644 --- a/r/tests/testthat/test-chunked-array.R +++ b/r/tests/testthat/test-chunked-array.R @@ -475,11 +475,6 @@ test_that("Handling string data with embedded nuls", { ) chunked_array_with_nul <- ChunkedArray$create(raws)$cast(utf8()) - # The behavior of the warnings/errors is slightly different with and without - # altrep. Without it (i.e. 3.5.0 and below, the error would trigger immediately - # on `as.vector()` where as with it, the error only happens on materialization) - skip_on_r_older_than("3.6") - v <- expect_error(as.vector(chunked_array_with_nul), NA) expect_error( diff --git a/r/tests/testthat/test-dataset-dplyr.R b/r/tests/testthat/test-dataset-dplyr.R index b8d93841921d7..d5c8dc9820a88 100644 --- a/r/tests/testthat/test-dataset-dplyr.R +++ b/r/tests/testthat/test-dataset-dplyr.R @@ -163,17 +163,6 @@ See $.data for the source Arrow object", ) }) -test_that("mutate() features not yet implemented", { - ds <- open_dataset(dataset_dir, partitioning = schema(part = uint8())) - expect_error( - ds %>% - group_by(int) %>% - mutate(avg = mean(int)), - "window functions not currently supported in Arrow\nCall collect() first to pull data into R.", - fixed = TRUE - ) -}) - test_that("filter scalar validation doesn't crash (ARROW-7772)", { ds <- open_dataset(dataset_dir, partitioning = schema(part = uint8())) expect_error( @@ -334,10 +323,9 @@ test_that("head/tail on query on dataset", { test_that("dplyr method not implemented messages", { ds <- open_dataset(dataset_dir) # This one is more nuanced - expect_error( + expect_snapshot( ds %>% filter(int > 6, dbl > max(dbl)), - "Filter expression not supported for Arrow Datasets: dbl > max(dbl)\nCall collect() first to pull data into R.", - fixed = TRUE + error = TRUE ) }) diff --git a/r/tests/testthat/test-dplyr-across.R b/r/tests/testthat/test-dplyr-across.R index 32476bab06fce..cfdad9a1f4c05 100644 --- a/r/tests/testthat/test-dplyr-across.R +++ b/r/tests/testthat/test-dplyr-across.R @@ -117,13 +117,11 @@ test_that("expand_across correctly expands quosures", { ) # ellipses (...) are a deprecated argument - expect_error( - expand_across( - as_adq(example_data), - quos(across(c(dbl, dbl2), round, digits = -1)) - ), - regexp = "`...` argument to `across()` is deprecated in dplyr and not supported in Arrow", - fixed = TRUE + # abandon_ship message offers multiple suggestions + expect_snapshot( + InMemoryDataset$create(example_data) %>% + mutate(across(c(dbl, dbl2), round, digits = -1)), + error = TRUE ) # alternative ways of specifying .fns - as a list diff --git a/r/tests/testthat/test-dplyr-collapse.R b/r/tests/testthat/test-dplyr-collapse.R index a8aa5556f1e0d..f658c531e78b5 100644 --- a/r/tests/testthat/test-dplyr-collapse.R +++ b/r/tests/testthat/test-dplyr-collapse.R @@ -168,33 +168,10 @@ total: int64 extra: int64 (multiply_checked(total, 5)) * Sorted by lgl [asc] -See $.data for the source Arrow object", - fixed = TRUE - ) - expect_output( - print(q$.data), - "Table (query) -int: int32 -lgl: bool - -* Aggregations: -total: sum(int) -* Filter: (dbl > 2) -* Grouped by lgl See $.data for the source Arrow object", fixed = TRUE ) - skip_if(getRversion() < "3.6.0", "TODO investigate why these aren't equal") - # On older R versions: - # ── Failure (test-dplyr-collapse.R:172:3): Properties of collapsed query ──────── - # head(q, 1) %>% collect() not equal to tibble::tibble(lgl = FALSE, total = 8L, extra = 40). - # Component "total": Mean relative difference: 0.3846154 - # Component "extra": Mean relative difference: 0.3846154 - # ── Failure (test-dplyr-collapse.R:176:3): Properties of collapsed query ──────── - # tail(q, 1) %>% collect() not equal to tibble::tibble(lgl = NA, total = 25L, extra = 125). - # Component "total": Mean relative difference: 0.9230769 - # Component "extra": Mean relative difference: 0.9230769 expect_equal( q %>% arrange(lgl) %>% diff --git a/r/tests/testthat/test-dplyr-eval.R b/r/tests/testthat/test-dplyr-eval.R new file mode 100644 index 0000000000000..16c56f28cdbbf --- /dev/null +++ b/r/tests/testthat/test-dplyr-eval.R @@ -0,0 +1,60 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +test_that("various paths in arrow_eval", { + expect_arrow_eval_error( + assert_is(1, "character"), + class = "validation_error" + ) + expect_arrow_eval_error( + NoTaVaRiAbLe, + class = "validation_error" + ) + expect_arrow_eval_error( + match.arg("z", c("a", "b")), + class = "validation_error" + ) + expect_arrow_eval_error( + stop("something something NotImplementedError"), + class = "arrow_not_supported" + ) +}) + +test_that("try_arrow_dplyr/abandon_ship adds the right message about collect()", { + tester <- function(.data, arg) { + try_arrow_dplyr({ + if (arg == 0) { + # This one just stops and doesn't recommend calling collect() + validation_error("arg is 0") + } else if (arg == 1) { + # This one recommends calling collect() + arrow_not_supported("arg == 1") + } else { + # Because this one has an alternative suggested, it adds "Or, collect()" + arrow_not_supported( + "arg greater than 0", + body = c(">" = "Try setting arg to -1") + ) + } + }) + } + + ds <- InMemoryDataset$create(arrow_table(x = 1)) + for (i in 0:2) { + expect_snapshot(tester(ds, i), error = TRUE) + } +}) diff --git a/r/tests/testthat/test-dplyr-filter.R b/r/tests/testthat/test-dplyr-filter.R index bf23685362a82..ba086133dcaf4 100644 --- a/r/tests/testthat/test-dplyr-filter.R +++ b/r/tests/testthat/test-dplyr-filter.R @@ -317,20 +317,28 @@ test_that("Filtering with unsupported functions", { filter(int > 2, pnorm(dbl) > .99) %>% collect(), tbl, - warning = "Expression pnorm\\(dbl\\) > 0.99 not supported in Arrow; pulling data into R" + warning = paste( + "In pnorm\\(dbl\\) > 0.99: ", + "i Expression not supported in Arrow", + "> Pulling data into R", + sep = "\n" + ) ) compare_dplyr_binding( .input %>% filter( nchar(chr, type = "bytes", allowNA = TRUE) == 1, # bad, Arrow msg int > 2, # good - pnorm(dbl) > .99 # bad, opaque + pnorm(dbl) > .99 # bad, opaque, but we'll error on the first one before we get here ) %>% collect(), tbl, - warning = '\\* In nchar\\(chr, type = "bytes", allowNA = TRUE\\) == 1, allowNA = TRUE not supported in Arrow -\\* Expression pnorm\\(dbl\\) > 0.99 not supported in Arrow -pulling data into R' + warning = paste( + 'In nchar\\(chr, type = "bytes", allowNA = TRUE\\) == 1: ', + "i allowNA = TRUE not supported in Arrow", + "> Pulling data into R", + sep = "\n" + ) ) }) @@ -467,7 +475,12 @@ test_that(".by argument", { filter(int > 2, pnorm(dbl) > .99, .by = chr) %>% collect(), tbl, - warning = "Expression pnorm\\(dbl\\) > 0.99 not supported in Arrow; pulling data into R" + warning = paste( + "In pnorm\\(dbl\\) > 0.99: ", + "i Expression not supported in Arrow", + "> Pulling data into R", + sep = "\n" + ) ) expect_error( tbl %>% diff --git a/r/tests/testthat/test-dplyr-funcs-conditional.R b/r/tests/testthat/test-dplyr-funcs-conditional.R index 3ea1853fec455..d90dc827b40d5 100644 --- a/r/tests/testthat/test-dplyr-funcs-conditional.R +++ b/r/tests/testthat/test-dplyr-funcs-conditional.R @@ -248,75 +248,50 @@ test_that("case_when()", { ) ) - # expected errors (which are caught by abandon_ship() and changed to warnings) - # TODO: Find a way to test these directly without abandon_ship() interfering - expect_error( - # no cases - expect_warning( - tbl %>% - Table$create() %>% - transmute(cw = case_when()), - "case_when" - ) - ) - expect_error( - # argument not a formula - expect_warning( - tbl %>% - Table$create() %>% - transmute(cw = case_when(TRUE ~ FALSE, TRUE)), - "case_when" - ) - ) - expect_error( - # non-logical R scalar on left side of formula - expect_warning( - tbl %>% - Table$create() %>% - transmute(cw = case_when(0L ~ FALSE, TRUE ~ FALSE)), - "case_when" - ) - ) - expect_error( + # validation errors + expect_arrow_eval_error( + case_when(), + "No cases provided", + class = "validation_error" + ) + expect_arrow_eval_error( + case_when(TRUE ~ FALSE, TRUE), + "Each argument to case_when\\(\\) must be a two-sided formula", + class = "validation_error" + ) + expect_arrow_eval_error( + case_when(0L ~ FALSE, TRUE ~ FALSE), + "Left side of each formula in case_when\\(\\) must be a logical expression", + class = "validation_error" + ) + expect_arrow_eval_error( # non-logical Arrow column reference on left side of formula - expect_warning( - tbl %>% - Table$create() %>% - transmute(cw = case_when(int ~ FALSE)), - "case_when" - ) + case_when(int ~ FALSE), + "Left side of each formula in case_when\\(\\) must be a logical expression", + class = "validation_error" ) - expect_error( - # non-logical Arrow expression on left side of formula - expect_warning( - tbl %>% - Table$create() %>% - transmute(cw = case_when(dbl + 3.14159 ~ TRUE)), - "case_when" - ) + expect_arrow_eval_error( + # non-logical Arrow column reference on left side of formula + case_when(dbl + 3.14159 ~ TRUE), + "Left side of each formula in case_when\\(\\) must be a logical expression", + class = "validation_error" ) - - expect_error( - expect_warning( - tbl %>% - arrow_table() %>% - mutate(cw = case_when(int > 5 ~ 1, .default = c(0, 1))) - ), - "`.default` must have size" + expect_arrow_eval_error( + case_when(int > 5 ~ 1, .default = c(0, 1)), + "`.default` must have size 1, not size 2", + class = "validation_error" ) - expect_warning( - tbl %>% - arrow_table() %>% - mutate(cw = case_when(int > 5 ~ 1, .ptype = integer())), - "not supported in Arrow" + expect_arrow_eval_error( + case_when(int > 5 ~ 1, .ptype = integer()), + "`case_when\\(\\)` with `.ptype` specified not supported in Arrow", + class = "arrow_not_supported" ) - expect_warning( - tbl %>% - arrow_table() %>% - mutate(cw = case_when(int > 5 ~ 1, .size = 10)), - "not supported in Arrow" + expect_arrow_eval_error( + case_when(int > 5 ~ 1, .size = 10), + "`case_when\\(\\)` with `.size` specified not supported in Arrow", + class = "arrow_not_supported" ) compare_dplyr_binding( @@ -500,9 +475,9 @@ test_that("coalesce()", { ) # no arguments - expect_error( - call_binding("coalesce"), - "At least one argument must be supplied to coalesce()", - fixed = TRUE + expect_arrow_eval_error( + coalesce(), + "At least one argument must be supplied to coalesce\\(\\)", + class = "validation_error" ) }) diff --git a/r/tests/testthat/test-dplyr-funcs-datetime.R b/r/tests/testthat/test-dplyr-funcs-datetime.R index 4d3226798d3ff..0e4d2f3656a43 100644 --- a/r/tests/testthat/test-dplyr-funcs-datetime.R +++ b/r/tests/testthat/test-dplyr-funcs-datetime.R @@ -180,7 +180,7 @@ test_that("strptime", { ) # these functions' internals use some string processing which requires the - # RE2 library (not available on Windows with R 3.6) + # RE2 library skip_if_not_available("re2") compare_dplyr_binding( @@ -198,7 +198,7 @@ test_that("strptime works for individual formats", { skip_on_cran() # these functions' internals use some string processing which requires the - # RE2 library (not available on Windows with R 3.6) + # RE2 library skip_if_not_available("re2") expect_equal( @@ -269,7 +269,7 @@ test_that("timestamp round trip correctly via strftime and strptime", { skip_on_cran() # these functions' internals use some string processing which requires the - # RE2 library (not available on Windows with R 3.6) + # RE2 library skip_if_not_available("re2") tz <- "Pacific/Marquesas" @@ -291,7 +291,9 @@ test_that("timestamp round trip correctly via strftime and strptime", { # Some formats are not supported on Windows if (!tolower(Sys.info()[["sysname"]]) == "windows") { - formats <- c(formats, "%a", "%A", "%b", "%B", "%OS", "%I%p", "%r", "%T%z") + # "%r" could also be here, though it is only valid in some locales (those + # that use 12 hour formats, so skip for now) + formats <- c(formats, "%a", "%A", "%b", "%B", "%OS", "%I%p", "%T%z") } for (fmt in formats) { @@ -1884,34 +1886,18 @@ test_that("`as.Date()` and `as_date()`", { ) # we do not support multiple tryFormats - # this is not a simple warning, therefore we cannot use compare_dplyr_binding() - # with `warning = TRUE` - # arrow_table test - expect_warning( - test_df %>% - arrow_table() %>% - mutate( - date_char_ymd = as.Date( - character_ymd_var, - tryFormats = c("%Y-%m-%d", "%Y/%m/%d") - ) - ) %>% - collect(), - regexp = "Consider using the lubridate specialised parsing functions" - ) - - # record batch test - expect_warning( + # Use a dataset to test the alternative suggestion message + expect_snapshot( test_df %>% - record_batch() %>% - mutate( + InMemoryDataset$create() %>% + transmute( date_char_ymd = as.Date( character_ymd_var, tryFormats = c("%Y-%m-%d", "%Y/%m/%d") ) ) %>% collect(), - regexp = "Consider using the lubridate specialised parsing functions" + error = TRUE ) # strptime does not support a partial format - Arrow returns NA, while @@ -2080,7 +2066,7 @@ test_that("as_datetime() works with other functions", { test_that("parse_date_time() works with year, month, and date components", { # these functions' internals use some string processing which requires the - # RE2 library (not available on Windows with R 3.6) + # RE2 library skip_if_not_available("re2") compare_dplyr_binding( .input %>% @@ -2139,7 +2125,7 @@ test_that("parse_date_time() works with year, month, and date components", { test_that("parse_date_time() works with a mix of formats and orders", { # these functions' internals use some string processing which requires the - # RE2 library (not available on Windows with R 3.6) + # RE2 library skip_if_not_available("re2") test_df <- tibble( string_combi = c("2021-09-1", "2/09//2021", "09.3.2021") @@ -2169,7 +2155,7 @@ test_that("year, month, day date/time parsers", { ) # these functions' internals use some string processing which requires the - # RE2 library (not available on Windows with R 3.6) + # RE2 library skip_if_not_available("re2") compare_dplyr_binding( .input %>% @@ -2221,7 +2207,7 @@ test_that("ym, my & yq parsers", { ) # these functions' internals use some string processing which requires the - # RE2 library (not available on Windows with R 3.6) + # RE2 library skip_if_not_available("re2") compare_dplyr_binding( .input %>% @@ -2270,7 +2256,7 @@ test_that("ym, my & yq parsers", { test_that("parse_date_time's other formats", { # these functions' internals use some string processing which requires the - # RE2 library (not available on Windows with R 3.6) + # RE2 library skip_if_not_available("re2") compare_dplyr_binding( @@ -2401,7 +2387,7 @@ test_that("lubridate's fast_strptime", { ) # these functions' internals use some string processing which requires the - # RE2 library (not available on Windows with R 3.6) + # RE2 library skip_if_not_available("re2") compare_dplyr_binding( @@ -2508,7 +2494,7 @@ test_that("parse_date_time with hours, minutes and seconds components", { # the unseparated strings are versions of "1987-08-22 20:13:59" (with %y) # these functions' internals use some string processing which requires the - # RE2 library (not available on Windows with R 3.6) + # RE2 library skip_if_not_available("re2") compare_dplyr_binding( @@ -2638,7 +2624,7 @@ test_that("parse_date_time with month names and HMS", { skip_on_os("windows") # these functions' internals use some string processing which requires the - # RE2 library (not available on Windows with R 3.6 & the minimal nightly builds) + # RE2 library (not available in the minimal nightly builds) skip_if_not_available("re2") test_dates_times2 <- tibble( @@ -2737,7 +2723,7 @@ test_that("parse_date_time with `quiet = FALSE` not supported", { # https://issues.apache.org/jira/browse/ARROW-17146 # these functions' internals use some string processing which requires the - # RE2 library (not available on Windows with R 3.6 & the minimal nightly builds) + # RE2 library (not available in the minimal nightly builds) skip_if_not_available("re2") expect_warning( @@ -2766,7 +2752,7 @@ test_that("parse_date_time with `quiet = FALSE` not supported", { test_that("parse_date_time with truncated formats", { # these functions' internals use some string processing which requires the - # RE2 library (not available on Windows with R 3.6) + # RE2 library skip_if_not_available("re2") test_truncation_df <- tibble( @@ -2853,7 +2839,7 @@ test_that("parse_date_time with `exact = TRUE`, and with regular R objects", { ) # these functions' internals use some string processing which requires the - # RE2 library (not available on Windows with R 3.6) + # RE2 library skip_if_not_available("re2") compare_dplyr_binding( .input %>% @@ -3124,11 +3110,9 @@ test_that("timestamp round/floor/ceiling works for a minimal test", { }) test_that("timestamp round/floor/ceiling accepts period unit abbreviation", { - # test helper to ensure standard abbreviations of period names # are understood by arrow and mirror the lubridate behaviour check_period_abbreviation <- function(unit, synonyms) { - # check arrow against lubridate compare_dplyr_binding( .input %>% @@ -3253,7 +3237,6 @@ test_that("timestamp round/floor/ceil works for units: month/quarter/year", { # check helper invoked when we need to avoid the lubridate rounding bug check_date_rounding_1051_bypass <- function(data, unit, ignore_attr = TRUE, ...) { - # directly compare arrow to lubridate for floor and ceiling compare_dplyr_binding( .input %>% @@ -3286,7 +3269,6 @@ check_date_rounding_1051_bypass <- function(data, unit, ignore_attr = TRUE, ...) } test_that("date round/floor/ceil works for units: month/quarter/year", { - # these test cases are affected by lubridate issue 1051 so we bypass # lubridate::round_date() for Date objects with large rounding units # https://github.com/tidyverse/lubridate/issues/1051 @@ -3346,7 +3328,6 @@ test_that("timestamp round/floor/ceil works for week units (non-standard week_st }) check_date_week_rounding <- function(data, week_start, ignore_attr = TRUE, ...) { - # directly compare arrow to lubridate for floor and ceiling compare_dplyr_binding( .input %>% @@ -3393,7 +3374,6 @@ test_that("date round/floor/ceil works for week units (non-standard week_start)" # ceiling_date behaves identically to the lubridate version. It takes # unit as an argument to run tests separately for different rounding units check_boundary_with_unit <- function(unit, ...) { - # timestamps compare_dplyr_binding( .input %>% @@ -3462,7 +3442,6 @@ test_that("temporal round/floor/ceil period unit maxima are enforced", { # results. this test helper runs that test, skipping cases where lubridate # produces incorrect answers check_timezone_rounding_vs_lubridate <- function(data, unit) { - # esoteric lubridate bug: on windows and macOS (not linux), lubridate returns # incorrect ceiling/floor for timezoned POSIXct times (syd, adl, kat zones, # but not mar) but not utc, and not for round, and only for these two @@ -3700,8 +3679,8 @@ test_that("with_tz() and force_tz() works", { mutate(timestamps = force_tz( timestamps, "Europe/Brussels", - roll_dst = "post") - ) %>% + roll_dst = "post" + )) %>% collect(), "roll_dst` value must be 'error' or 'boundary' for nonexistent times" ) @@ -3710,11 +3689,10 @@ test_that("with_tz() and force_tz() works", { tibble::tibble(timestamps = nonexistent) %>% arrow_table() %>% mutate(timestamps = force_tz( - timestamps, - "Europe/Brussels", - roll_dst = c("boundary", "NA") - ) - ) %>% + timestamps, + "Europe/Brussels", + roll_dst = c("boundary", "NA") + )) %>% collect(), "`roll_dst` value must be 'error', 'pre', or 'post' for nonexistent times" ) diff --git a/r/tests/testthat/test-dplyr-funcs-string.R b/r/tests/testthat/test-dplyr-funcs-string.R index 039220b88ee00..cb1d4675058b6 100644 --- a/r/tests/testthat/test-dplyr-funcs-string.R +++ b/r/tests/testthat/test-dplyr-funcs-string.R @@ -172,27 +172,31 @@ test_that("paste, paste0, and str_c", { # expected errors # collapse argument not supported - expect_error( - call_binding("paste", x, y, collapse = ""), - "collapse" + expect_arrow_eval_error( + paste(chr, int, collapse = ""), + "`collapse` argument not supported in Arrow", + class = "arrow_not_supported" ) - expect_error( - call_binding("paste0", x, y, collapse = ""), - "collapse" + expect_arrow_eval_error( + paste0(chr, int, collapse = ""), + "`collapse` argument not supported in Arrow", + class = "arrow_not_supported" ) - expect_error( - call_binding("str_c", x, y, collapse = ""), - "collapse" + expect_arrow_eval_error( + str_c(chr, int, collapse = ""), + "`collapse` argument not supported in Arrow", + class = "arrow_not_supported" ) - # literal vectors of length != 1 not supported - expect_error( - call_binding("paste", x, character(0), y), - "Literal vectors of length != 1 not supported in string concatenation" + expect_arrow_eval_error( + paste(chr, character(0), int), + "Literal vectors of length != 1 in string concatenation not supported in Arrow", + class = "arrow_not_supported" ) - expect_error( - call_binding("paste", x, c(",", ";"), y), - "Literal vectors of length != 1 not supported in string concatenation" + expect_arrow_eval_error( + paste(chr, c(",", ";"), int), + "Literal vectors of length != 1 in string concatenation not supported in Arrow", + class = "arrow_not_supported" ) }) @@ -602,10 +606,15 @@ test_that("str_to_lower, str_to_upper, and str_to_title", { ) # Error checking a single function because they all use the same code path. - expect_error( - call_binding("str_to_lower", "Apache Arrow", locale = "sp"), - "Providing a value for 'locale' other than the default ('en') is not supported in Arrow", - fixed = TRUE + expect_arrow_eval_error( + str_to_lower("Apache Arrow", locale = "sp"), + paste( + "Providing a value for 'locale' other than the default ('en') not supported in Arrow", + "> To change locale, use 'Sys.setlocale()'", + sep = "\n" + ), + fixed = TRUE, + class = "arrow_not_supported" ) }) @@ -1041,14 +1050,15 @@ test_that("substr with string()", { df ) - expect_error( - call_binding("substr", "Apache Arrow", c(1, 2), 3), - "`start` must be length 1 - other lengths are not supported in Arrow" + expect_arrow_eval_error( + substr("Apache Arrow", c(1, 2), 3), + "`start` must be length 1 - other lengths not supported in Arrow", + class = "arrow_not_supported" ) - - expect_error( - call_binding("substr", "Apache Arrow", 1, c(2, 3)), - "`stop` must be length 1 - other lengths are not supported in Arrow" + expect_arrow_eval_error( + substr("Apache Arrow", 1, c(2, 3)), + "`stop` must be length 1 - other lengths not supported in Arrow", + class = "arrow_not_supported" ) }) @@ -1169,14 +1179,15 @@ test_that("str_sub", { df ) - expect_error( - call_binding("str_sub", "Apache Arrow", c(1, 2), 3), - "`start` must be length 1 - other lengths are not supported in Arrow" + expect_arrow_eval_error( + str_sub("Apache Arrow", c(1, 2), 3), + "`start` must be length 1 - other lengths not supported in Arrow", + class = "arrow_not_supported" ) - - expect_error( - call_binding("str_sub", "Apache Arrow", 1, c(2, 3)), - "`end` must be length 1 - other lengths are not supported in Arrow" + expect_arrow_eval_error( + str_sub("Apache Arrow", 1, c(2, 3)), + "`end` must be length 1 - other lengths not supported in Arrow", + class = "arrow_not_supported" ) }) diff --git a/r/tests/testthat/test-dplyr-funcs-type.R b/r/tests/testthat/test-dplyr-funcs-type.R index 2624e16156bce..ecb6b3b7b45b6 100644 --- a/r/tests/testthat/test-dplyr-funcs-type.R +++ b/r/tests/testthat/test-dplyr-funcs-type.R @@ -754,11 +754,10 @@ test_that("structs/nested data frames/tibbles can be created", { ) # check that data.frame is mapped too - # stringsAsFactors default is TRUE in R 3.6, which is still tested on CI compare_dplyr_binding( .input %>% transmute( - df_col = data.frame(regular_col1, regular_col2, stringsAsFactors = FALSE) + df_col = data.frame(regular_col1, regular_col2) ) %>% collect() %>% mutate(df_col = as.data.frame(df_col)), diff --git a/r/tests/testthat/test-dplyr-funcs.R b/r/tests/testthat/test-dplyr-funcs.R index 039604a85ee0c..48c5d730f8493 100644 --- a/r/tests/testthat/test-dplyr-funcs.R +++ b/r/tests/testthat/test-dplyr-funcs.R @@ -19,35 +19,25 @@ skip_on_cran() test_that("register_binding()/unregister_binding() works", { - fake_registry <- new.env(parent = emptyenv()) fun1 <- function() NULL fun2 <- function() "Hello" - expect_null(register_binding("some.pkg::some_fun", fun1, fake_registry)) - expect_identical(fake_registry$some_fun, fun1) - expect_identical(fake_registry$`some.pkg::some_fun`, fun1) + expect_null(register_binding("some.pkg::some_fun", fun1)) + expect_identical(.cache$functions$some_fun, fun1) + expect_identical(.cache$functions$`some.pkg::some_fun`, fun1) - expect_identical(unregister_binding("some.pkg::some_fun", fake_registry), fun1) - expect_false("some.pkg::some_fun" %in% names(fake_registry)) - expect_false("some_fun" %in% names(fake_registry)) + expect_identical(unregister_binding("some.pkg::some_fun"), fun1) + expect_false("some.pkg::some_fun" %in% names(.cache$functions)) + expect_false("some_fun" %in% names(.cache$functions)) - expect_null(register_binding("somePkg::some_fun", fun1, fake_registry)) - expect_identical(fake_registry$some_fun, fun1) + expect_null(register_binding("somePkg::some_fun", fun1)) + expect_identical(.cache$functions$some_fun, fun1) expect_warning( - register_binding("some.pkg2::some_fun", fun2, fake_registry), + register_binding("some.pkg2::some_fun", fun2), "A \"some_fun\" binding already exists in the registry and will be overwritten." ) # No warning when an identical function is re-registered - expect_silent(register_binding("some.pkg2::some_fun", fun2, fake_registry)) -}) - -test_that("register_binding_agg() works", { - fake_registry <- new.env(parent = emptyenv()) - fun1 <- function() NULL - - expect_null(register_binding_agg("somePkg::some_fun", fun1, fake_registry)) - expect_identical(fake_registry$some_fun, fun1) - expect_identical(fake_registry$`somePkg::some_fun`, fun1) + expect_silent(register_binding("some.pkg2::some_fun", fun2)) }) diff --git a/r/tests/testthat/test-dplyr-glimpse.R b/r/tests/testthat/test-dplyr-glimpse.R index c93273bdeef34..d39fef9e82cca 100644 --- a/r/tests/testthat/test-dplyr-glimpse.R +++ b/r/tests/testthat/test-dplyr-glimpse.R @@ -15,11 +15,6 @@ # specific language governing permissions and limitations # under the License. -# The glimpse output for tests with `example_data` is different on R < 3.6 -# because the `lgl` column is generated with `sample()` and the RNG -# algorithm is different in older R versions. -skip_on_r_older_than("3.6") - library(dplyr, warn.conflicts = FALSE) test_that("glimpse() Table/ChunkedArray", { diff --git a/r/tests/testthat/test-dplyr-join.R b/r/tests/testthat/test-dplyr-join.R index e3e1e98cfca15..9a1c8b7b80fea 100644 --- a/r/tests/testthat/test-dplyr-join.R +++ b/r/tests/testthat/test-dplyr-join.R @@ -441,3 +441,35 @@ test_that("full joins handle keep", { small_dataset_df ) }) + +left <- tibble::tibble( + x = c(1, NA, 3), +) +right <- tibble::tibble( + x = c(1, NA, 3), + y = c("a", "b", "c") +) +na_matches_na <- right +na_matches_never <- tibble::tibble( + x = c(1, NA, 3), + y = c("a", NA, "c") +) +test_that("na_matches argument to join: na (default)", { + expect_equal( + arrow_table(left) %>% + left_join(right, by = "x", na_matches = "na") %>% + arrange(x) %>% + collect(), + na_matches_na %>% arrange(x) + ) +}) + +test_that("na_matches argument to join: never", { + expect_equal( + arrow_table(left) %>% + left_join(right, by = "x", na_matches = "never") %>% + arrange(x) %>% + collect(), + na_matches_never %>% arrange(x) + ) +}) diff --git a/r/tests/testthat/test-dplyr-mutate.R b/r/tests/testthat/test-dplyr-mutate.R index 0889fffedd508..fa13c151b14e3 100644 --- a/r/tests/testthat/test-dplyr-mutate.R +++ b/r/tests/testthat/test-dplyr-mutate.R @@ -152,16 +152,14 @@ test_that("transmute() with unsupported arguments", { }) test_that("transmute() defuses dots arguments (ARROW-13262)", { - expect_warning( + expect_snapshot( tbl %>% Table$create() %>% transmute( a = stringr::str_c(padded_strings, padded_strings), b = stringr::str_squish(a) ) %>% - collect(), - "Expression stringr::str_squish(a) not supported in Arrow; pulling data into R", - fixed = TRUE + collect() ) }) @@ -202,10 +200,7 @@ test_that("nchar() arguments", { filter(line_lengths > 15) %>% collect(), tbl, - warning = paste0( - "In nchar\\(verses, type = \"bytes\", allowNA = TRUE\\), ", - "allowNA = TRUE not supported in Arrow; pulling data into R" - ) + warning = "allowNA = TRUE not supported in Arrow" ) }) @@ -378,18 +373,16 @@ test_that("dplyr::mutate's examples", { # The mutate operation may yield different results on grouped # tibbles because the expressions are computed within groups. # The following normalises `mass` by the global average: - # TODO(ARROW-13926): support window functions compare_dplyr_binding( .input %>% select(name, mass, species) %>% mutate(mass_norm = mass / mean(mass, na.rm = TRUE)) %>% collect(), - starwars, - warning = "window function" + starwars ) }) -test_that("Can mutate after group_by as long as there are no aggregations", { +test_that("Can mutate after group_by, including with some aggregations", { compare_dplyr_binding( .input %>% select(int, chr) %>% @@ -417,31 +410,31 @@ test_that("Can mutate after group_by as long as there are no aggregations", { collect(), tbl ) - expect_warning( - tbl %>% - Table$create() %>% + compare_dplyr_binding( + .input %>% select(int, chr) %>% group_by(chr) %>% mutate(avg_int = mean(int)) %>% + # Because this silently does a join, the rows can get unsorted + arrange(chr) %>% collect(), - "window functions not currently supported in Arrow; pulling data into R", - fixed = TRUE + tbl ) - expect_warning( - tbl %>% - Table$create() %>% + compare_dplyr_binding( + .input %>% select(mean = int, chr) %>% # rename `int` to `mean` and use `mean(mean)` in `mutate()` to test that # `all_funs()` detects `mean()` despite the collision with a column name group_by(chr) %>% mutate(avg_int = mean(mean)) %>% + # Because this silently does a join, the rows can get unsorted + arrange(chr) %>% collect(), - "window functions not currently supported in Arrow; pulling data into R", - fixed = TRUE + tbl ) }) -test_that("Can mutate with .by argument as long as there are no aggregations", { +test_that("Can mutate with .by argument, even with some aggregations", { compare_dplyr_binding( .input %>% select(int, chr) %>% @@ -479,25 +472,25 @@ test_that("Can mutate with .by argument as long as there are no aggregations", { collect(), tbl ) - expect_warning( - tbl %>% - Table$create() %>% + compare_dplyr_binding( + .input %>% select(int, chr) %>% mutate(avg_int = mean(int), .by = chr) %>% + # Because this silently does a join, the rows can get unsorted + arrange(chr) %>% collect(), - "window functions not currently supported in Arrow; pulling data into R", - fixed = TRUE + tbl ) - expect_warning( - tbl %>% - Table$create() %>% + compare_dplyr_binding( + .input %>% select(mean = int, chr) %>% # rename `int` to `mean` and use `mean(mean)` in `mutate()` to test that # `all_funs()` detects `mean()` despite the collision with a column name mutate(avg_int = mean(mean), .by = chr) %>% + # Because this silently does a join, the rows can get unsorted + arrange(chr) %>% collect(), - "window functions not currently supported in Arrow; pulling data into R", - fixed = TRUE + tbl ) }) @@ -540,7 +533,7 @@ test_that("Can't just add a vector column with mutate()", { mutate(again = 1:10), tibble::tibble(int = tbl$int, again = 1:10) ), - "In again = 1:10, only values of size one are recycled; pulling data into R" + "Recycling values of length != 1 not supported in Arrow" ) }) @@ -682,7 +675,6 @@ test_that("mutate() and transmute() with namespaced functions", { }) test_that("Can use across() within mutate()", { - # expressions work in the right order compare_dplyr_binding( .input %>% @@ -717,17 +709,15 @@ test_that("Can use across() within mutate()", { example_data ) - # gives the right error with window functions - expect_warning( - arrow_table(example_data) %>% + compare_dplyr_binding( + .input %>% mutate( x = int + 2, across(c("int", "dbl"), list(mean = mean, sd = sd, round)), exp(dbl2) ) %>% collect(), - "window functions not currently supported in Arrow; pulling data into R", - fixed = TRUE + example_data ) }) diff --git a/r/tests/testthat/test-dplyr-summarize.R b/r/tests/testthat/test-dplyr-summarize.R index 87bb5e5fac959..95212407acf9d 100644 --- a/r/tests/testthat/test-dplyr-summarize.R +++ b/r/tests/testthat/test-dplyr-summarize.R @@ -337,20 +337,20 @@ test_that("Functions that take ... but we only accept a single arg", { ) # Now that we've demonstrated that the whole machinery works, let's test - # the agg_funcs directly - expect_error(call_binding_agg("n_distinct"), "n_distinct() with 0 arguments", fixed = TRUE) - expect_error(call_binding_agg("sum"), "sum() with 0 arguments", fixed = TRUE) - expect_error(call_binding_agg("prod"), "prod() with 0 arguments", fixed = TRUE) - expect_error(call_binding_agg("any"), "any() with 0 arguments", fixed = TRUE) - expect_error(call_binding_agg("all"), "all() with 0 arguments", fixed = TRUE) - expect_error(call_binding_agg("min"), "min() with 0 arguments", fixed = TRUE) - expect_error(call_binding_agg("max"), "max() with 0 arguments", fixed = TRUE) - expect_error(call_binding_agg("n_distinct", 1, 2), "Multiple arguments to n_distinct()") - expect_error(call_binding_agg("sum", 1, 2), "Multiple arguments to sum") - expect_error(call_binding_agg("any", 1, 2), "Multiple arguments to any()") - expect_error(call_binding_agg("all", 1, 2), "Multiple arguments to all()") - expect_error(call_binding_agg("min", 1, 2), "Multiple arguments to min()") - expect_error(call_binding_agg("max", 1, 2), "Multiple arguments to max()") + # the agg funcs directly + expect_error(call_binding("n_distinct"), "n_distinct() with 0 arguments", fixed = TRUE) + expect_error(call_binding("sum"), "sum() with 0 arguments", fixed = TRUE) + expect_error(call_binding("prod"), "prod() with 0 arguments", fixed = TRUE) + expect_error(call_binding("any"), "any() with 0 arguments", fixed = TRUE) + expect_error(call_binding("all"), "all() with 0 arguments", fixed = TRUE) + expect_error(call_binding("min"), "min() with 0 arguments", fixed = TRUE) + expect_error(call_binding("max"), "max() with 0 arguments", fixed = TRUE) + expect_error(call_binding("n_distinct", 1, 2), "Multiple arguments to n_distinct()") + expect_error(call_binding("sum", 1, 2), "Multiple arguments to sum") + expect_error(call_binding("any", 1, 2), "Multiple arguments to any()") + expect_error(call_binding("all", 1, 2), "Multiple arguments to all()") + expect_error(call_binding("min", 1, 2), "Multiple arguments to min()") + expect_error(call_binding("max", 1, 2), "Multiple arguments to max()") }) test_that("median()", { @@ -832,28 +832,18 @@ test_that("Expressions on aggregations", { ) # Aggregates on aggregates are not supported - expect_warning( - record_batch(tbl) %>% summarise(any(any(lgl))), - paste( - "In any\\(any\\(lgl\\)\\), aggregate within aggregate expression", - "not supported in Arrow" - ) + expect_snapshot( + record_batch(tbl) %>% summarise(any(any(lgl))) ) # Check aggregates on aggregates with more complex calls expect_warning( record_batch(tbl) %>% summarise(any(any(!lgl))), - paste( - "In any\\(any\\(!lgl\\)\\), aggregate within aggregate expression", - "not supported in Arrow" - ) + "aggregate within aggregate expression not supported in Arrow" ) expect_warning( record_batch(tbl) %>% summarise(!any(any(lgl))), - paste( - "In \\!any\\(any\\(lgl\\)\\), aggregate within aggregate expression", - "not supported in Arrow" - ) + "aggregate within aggregate expression not supported in Arrow" ) }) @@ -965,7 +955,7 @@ test_that("Summarize with 0 arguments", { ) }) -test_that("Not (yet) supported: window functions", { +test_that("Not supported: window functions", { compare_dplyr_binding( .input %>% group_by(some_grouping) %>% @@ -974,10 +964,7 @@ test_that("Not (yet) supported: window functions", { ) %>% collect(), tbl, - warning = paste( - "In sum\\(\\(dbl - mean\\(dbl\\)\\)\\^2\\), aggregate within", - "aggregate expression not supported in Arrow; pulling data into R" - ) + warning = "aggregate within aggregate expression not supported in Arrow" ) compare_dplyr_binding( .input %>% @@ -987,10 +974,7 @@ test_that("Not (yet) supported: window functions", { ) %>% collect(), tbl, - warning = paste( - "In sum\\(dbl - mean\\(dbl\\)\\), aggregate within aggregate expression", - "not supported in Arrow; pulling data into R" - ) + warning = "aggregate within aggregate expression not supported in Arrow" ) compare_dplyr_binding( .input %>% @@ -1000,10 +984,7 @@ test_that("Not (yet) supported: window functions", { ) %>% collect(), tbl, - warning = paste( - "In sqrt\\(sum\\(\\(dbl - mean\\(dbl\\)\\)\\^2\\)/\\(n\\(\\) - 1L\\)\\), aggregate within", - "aggregate expression not supported in Arrow; pulling data into R" - ) + warning = "aggregate within aggregate expression not supported in Arrow" ) compare_dplyr_binding( @@ -1012,10 +993,7 @@ test_that("Not (yet) supported: window functions", { summarize(y - mean(y)) %>% collect(), data.frame(x = 1, y = 2), - warning = paste( - "Expression y - mean\\(y\\) is not a valid aggregation expression", - "or is not supported in Arrow; pulling data into R" - ) + warning = "Expression is not a valid aggregation expression or is not supported in Arrow" ) compare_dplyr_binding( @@ -1024,10 +1002,7 @@ test_that("Not (yet) supported: window functions", { summarize(y) %>% collect(), data.frame(x = 1, y = 2), - warning = paste( - "Expression y is not a valid aggregation expression", - "or is not supported in Arrow; pulling data into R" - ) + warning = "Expression is not a valid aggregation expression or is not supported in Arrow" ) # This one could possibly be supported--in mutate() @@ -1037,10 +1012,7 @@ test_that("Not (yet) supported: window functions", { summarize(x - y) %>% collect(), data.frame(x = 1, y = 2, z = 3), - warning = paste( - "Expression x - y is not a valid aggregation expression", - "or is not supported in Arrow; pulling data into R" - ) + warning = "Expression is not a valid aggregation expression or is not supported in Arrow" ) }) @@ -1274,13 +1246,12 @@ test_that("Can use across() within summarise()", { ) # across() doesn't work in summarise when input expressions evaluate to bare field references - expect_warning( + expect_snapshot( data.frame(x = 1, y = 2) %>% arrow_table() %>% group_by(x) %>% summarise(across(everything())) %>% - collect(), - regexp = "Expression y is not a valid aggregation expression or is not supported in Arrow; pulling data into R" + collect() ) }) diff --git a/r/tests/testthat/test-scalar.R b/r/tests/testthat/test-scalar.R index 06f956504350e..8335dc95cd85c 100644 --- a/r/tests/testthat/test-scalar.R +++ b/r/tests/testthat/test-scalar.R @@ -94,10 +94,6 @@ test_that("Handling string data with embedded nuls", { ) scalar_with_nul <- scalar(raws, binary())$cast(utf8()) - # The behavior of the warnings/errors is slightly different with and without - # altrep. Without it (i.e. 3.5.0 and below, the error would trigger immediately - # on `as.vector()` where as with it, the error only happens on materialization) - skip_on_r_older_than("3.6") v <- expect_error(as.vector(scalar_with_nul), NA) expect_error( v[1], diff --git a/r/tests/testthat/test-udf.R b/r/tests/testthat/test-udf.R index 0eb75b1dde6e5..8604dc610a435 100644 --- a/r/tests/testthat/test-udf.R +++ b/r/tests/testthat/test-udf.R @@ -90,7 +90,7 @@ test_that("register_scalar_function() adds a compute function to the registry", int32(), float64(), auto_convert = TRUE ) - on.exit(unregister_binding("times_32", update_cache = TRUE)) + on.exit(unregister_binding("times_32")) expect_true("times_32" %in% names(asNamespace("arrow")$.cache$functions)) expect_true("times_32" %in% list_compute_functions()) @@ -124,7 +124,7 @@ test_that("arrow_scalar_function() with bad return type errors", { int32(), float64() ) - on.exit(unregister_binding("times_32_bad_return_type_array", update_cache = TRUE)) + on.exit(unregister_binding("times_32_bad_return_type_array")) expect_error( call_function("times_32_bad_return_type_array", Array$create(1L)), @@ -137,7 +137,7 @@ test_that("arrow_scalar_function() with bad return type errors", { int32(), float64() ) - on.exit(unregister_binding("times_32_bad_return_type_scalar", update_cache = TRUE)) + on.exit(unregister_binding("times_32_bad_return_type_scalar")) expect_error( call_function("times_32_bad_return_type_scalar", Array$create(1L)), @@ -155,7 +155,7 @@ test_that("register_scalar_function() can register multiple kernels", { out_type = function(in_types) in_types[[1]], auto_convert = TRUE ) - on.exit(unregister_binding("times_32", update_cache = TRUE)) + on.exit(unregister_binding("times_32")) expect_equal( call_function("times_32", Scalar$create(1L, int32())), @@ -238,7 +238,7 @@ test_that("user-defined functions work during multi-threaded execution", { float64(), auto_convert = TRUE ) - on.exit(unregister_binding("times_32", update_cache = TRUE)) + on.exit(unregister_binding("times_32")) # check a regular collect() result <- open_dataset(tf_dataset) %>% @@ -271,7 +271,7 @@ test_that("nested exec plans can contain user-defined functions", { float64(), auto_convert = TRUE ) - on.exit(unregister_binding("times_32", update_cache = TRUE)) + on.exit(unregister_binding("times_32")) stream_plan_with_udf <- function() { record_batch(a = 1:1000) %>% @@ -310,7 +310,7 @@ test_that("head() on exec plan containing user-defined functions", { float64(), auto_convert = TRUE ) - on.exit(unregister_binding("times_32", update_cache = TRUE)) + on.exit(unregister_binding("times_32")) result <- record_batch(a = 1:1000) %>% dplyr::mutate(b = times_32(a)) %>% diff --git a/r/tools/nixlibs.R b/r/tools/nixlibs.R index 0af41888b95b7..def4d35f825be 100644 --- a/r/tools/nixlibs.R +++ b/r/tools/nixlibs.R @@ -386,9 +386,7 @@ distro <- function() { out$id <- tolower(out$id) # debian unstable & testing lsb_release `version` don't include numbers but we can map from pretty name if (is.null(out$version) || out$version %in% c("testing", "unstable")) { - if (grepl("bullseye", out$codename)) { - out$short_version <- "11" - } else if (grepl("bookworm", out$codename)) { + if (grepl("bookworm", out$codename)) { out$short_version <- "12" } } else if (out$id == "ubuntu") { diff --git a/r/tools/test-nixlibs.R b/r/tools/test-nixlibs.R index ed5192d806990..02e822c3420c8 100644 --- a/r/tools/test-nixlibs.R +++ b/r/tools/test-nixlibs.R @@ -23,8 +23,9 @@ TESTING <- TRUE # The functions use `on_macos` from the env they were sourced in, so we need tool # explicitly set it in that environment. +# We capture.output for a cleaner testthat output. nixlibs_env <- environment() -source("nixlibs.R", local = nixlibs_env) +capture.output(source("nixlibs.R", local = nixlibs_env)) test_that("identify_binary() based on LIBARROW_BINARY", { expect_null(identify_binary("FALSE")) diff --git a/r/tools/ubsan.supp b/r/tools/ubsan.supp index ff88cf984136b..34854e79bcbf9 100644 --- a/r/tools/ubsan.supp +++ b/r/tools/ubsan.supp @@ -16,3 +16,4 @@ # under the License. vptr:include/c++/8/bits/shared_ptr_base.h +function:cleancall.c \ No newline at end of file diff --git a/r/vignettes/data_wrangling.Rmd b/r/vignettes/data_wrangling.Rmd index 305a91c156eb1..1d074ef0cfedb 100644 --- a/r/vignettes/data_wrangling.Rmd +++ b/r/vignettes/data_wrangling.Rmd @@ -165,33 +165,7 @@ sw2 %>% transmute(name, height, mass, res = residuals(lm(mass ~ height))) ``` -Because window functions are not supported, computing an aggregation like `mean()` on a grouped table or within a rowwise operation like `filter()` is not supported: - -```{r} -sw %>% - select(1:4) %>% - filter(!is.na(hair_color)) %>% - group_by(hair_color) %>% - filter(height < mean(height, na.rm = TRUE)) -``` - -This operation is sometimes referred to as a windowed aggregate and can be accomplished in Arrow by computing the aggregation separately, for example within a join operation: - -```{r} -sw %>% - select(1:4) %>% - filter(!is.na(hair_color)) %>% - left_join( - sw %>% - group_by(hair_color) %>% - summarize(mean_height = mean(height, na.rm = TRUE)) - ) %>% - filter(height < mean_height) %>% - select(!mean_height) %>% - collect() -``` - -Alternatively, [DuckDB](https:\www.duckdb.org) supports Arrow natively, so you can pass the `Table` object to DuckDB without paying a performance penalty using the helper function `to_duckdb()` and pass the object back to Arrow with `to_arrow()`: +For some operations, you can use [DuckDB](https://www.duckdb.org). It supports Arrow natively, so you can pass the `Dataset` or query object to DuckDB without paying a performance penalty using the helper function `to_duckdb()` and pass the object back to Arrow with `to_arrow()`: ```{r} sw %>% diff --git a/r/vignettes/developers/docker.Rmd b/r/vignettes/developers/docker.Rmd index de2795cfa6bb5..13f60904c9484 100644 --- a/r/vignettes/developers/docker.Rmd +++ b/r/vignettes/developers/docker.Rmd @@ -5,23 +5,23 @@ description: > output: rmarkdown::html_vignette --- -Arrow is compatible with a huge number of combinations of OSs, OS versions, -compilers, R versions, and other variables. Sometimes these combinations of -variables means that behaviours are found in some environments which cannot be -replicated in others. In addition, there are different ways of building Arrow, -for example, using environment variables to specify the building of optional +Arrow is compatible with a huge number of combinations of OSs, OS versions, +compilers, R versions, and other variables. Sometimes these combinations of +variables means that behaviours are found in some environments which cannot be +replicated in others. In addition, there are different ways of building Arrow, +for example, using environment variables to specify the building of optional components. -What all this means is that you may need to use a different setup to the one in -which you are working, when diagnosing a bug or testing out a new feature which -you have reason to believe may be affected by these variables. One way to do +What all this means is that you may need to use a different setup to the one in +which you are working, when diagnosing a bug or testing out a new feature which +you have reason to believe may be affected by these variables. One way to do this is so spin up a Docker image containing the desired setup. This article provides a basic guide to using Docker in your R development. ## How do I run a Docker container? -There are a number of images which have been created for the convenience of +There are a number of images which have been created for the convenience of Arrow devs and you can find them on [the DockerHub repo](https://hub.docker.com/r/apache/arrow-dev/tags). The code below shows an example command you could use to run a Docker container. @@ -29,7 +29,7 @@ The code below shows an example command you could use to run a Docker container. This should be run in the root directory of a checkout of the arrow repo. ```shell -docker run -it -e ARROW_DEPENDENCY_SOURCE=AUTO -v $(pwd):/arrow apache/arrow-dev:r-rhub-ubuntu-gcc-release-latest +docker run -it -e ARROW_DEPENDENCY_SOURCE=AUTO -v $(pwd):/arrow apache/arrow-dev:r-rhub-ubuntu-release-latest ``` Components: @@ -39,13 +39,13 @@ Components: * `-e ARROW_DEPENDENCY_SOURCE=AUTO` - set the environment variable `ARROW_DEPENDENCY_SOURCE` to the value `AUTO` * `-v $(pwd):/arrow` - mount the current directory at `/arrow` in the container * `apache/arrow-dev` - the DockerHub repo to get this container from -* `r-rhub-ubuntu-gcc-release-latest` - the image tag +* `r-rhub-ubuntu-release-latest` - the image tag -Once you run this command, if you don't have a copy of that particular image +Once you run this command, if you don't have a copy of that particular image saved locally, it will first be downloaded before a container is spun up. -In the example above, mounting the directory in which the Arrow repo was stored -on the local machine, meant that that code could be built and tested on the +In the example above, mounting the directory in which the Arrow repo was stored +on the local machine, meant that that code could be built and tested on the container. ## How do I exit this image? @@ -73,29 +73,29 @@ sudo docker ps -a ## Running existing workflows from docker-compose.yml There are a number of workflows outlined in the file `docker-compose.yml` in the -arrow repo root directory. For example, you can use the workflow called `r` to -test building and installing the R package. This is advantageous as you can use -existing utility scripts and install it onto a container which already has R on +arrow repo root directory. For example, you can use the workflow called `r` to +test building and installing the R package. This is advantageous as you can use +existing utility scripts and install it onto a container which already has R on it. -These workflows are also parameterized, which means you can specify different +These workflows are also parameterized, which means you can specify different options (or just use the defaults, which can be found in `.env`) ### Example - The manual way -If you wanted to run [RHub's latest `ubuntu-gcc-release` image](https://hub.docker.com/r/rhub/ubuntu-gcc-release), you could +If you wanted to run [RHub's latest `ubuntu-release` image](https://hub.docker.com/r/rhub/ubuntu-release), you could run: -```shell -R_ORG=rhub R_IMAGE=ubuntu-gcc-release R_TAG=latest docker-compose build r -R_ORG=rhub R_IMAGE=ubuntu-gcc-release R_TAG=latest docker-compose run r +```shell +R_ORG=rhub R_IMAGE=ubuntu-release R_TAG=latest docker-compose build r +R_ORG=rhub R_IMAGE=ubuntu-release R_TAG=latest docker-compose run r ``` ### Example - Using Archery Alternatively, you may prefer to use the [Archery tool to run docker images](https://arrow.apache.org/docs/developers/docker.html). -This has the advantage of making it simpler to build some of the existing Arrow -CI jobs which have hierarchical dependencies, and so for example, you could +This has the advantage of making it simpler to build some of the existing Arrow +CI jobs which have hierarchical dependencies, and so for example, you could build the R package on a container which already has the C++ code pre-built. This is the same tool which our CI uses - via a tool called [Crossbow](https://arrow.apache.org/docs/developers/crossbow.html). @@ -103,5 +103,5 @@ This is the same tool which our CI uses - via a tool called [Crossbow](https://a If you want to run the `r` workflow discussed above, you could run: ```shell -R_ORG=rhub R_IMAGE=ubuntu-gcc-release R_TAG=latest archery docker run r +R_ORG=rhub R_IMAGE=ubuntu-release R_TAG=latest archery docker run r ``` diff --git a/r/vignettes/developers/matchsubstringoptions.png b/r/vignettes/developers/matchsubstringoptions.png deleted file mode 100644 index 2dff3c5858e09..0000000000000 Binary files a/r/vignettes/developers/matchsubstringoptions.png and /dev/null differ diff --git a/r/vignettes/developers/starts_with_docs.png b/r/vignettes/developers/starts_with_docs.png deleted file mode 100644 index a55e888128fcd..0000000000000 Binary files a/r/vignettes/developers/starts_with_docs.png and /dev/null differ diff --git a/r/vignettes/developers/startswithdocs.png b/r/vignettes/developers/startswithdocs.png deleted file mode 100644 index 6e1f3df1b9b82..0000000000000 Binary files a/r/vignettes/developers/startswithdocs.png and /dev/null differ diff --git a/r/vignettes/developers/writing_bindings.Rmd b/r/vignettes/developers/writing_bindings.Rmd deleted file mode 100644 index 443211b3c2b5e..0000000000000 --- a/r/vignettes/developers/writing_bindings.Rmd +++ /dev/null @@ -1,254 +0,0 @@ ---- -title: "Writing dplyr bindings" -description: > - Learn how to write bindings that allow arrow to mirror the behavior - of native R functions within dplyr pipelines -output: rmarkdown::html_vignette ---- - -```{r, include=FALSE} -library(arrow, warn.conflicts = FALSE) -library(dplyr, warn.conflicts = FALSE) -``` - -When writing bindings between C++ compute functions and R functions, the aim is -to expose the C++ functionality via the same interface as existing R functions. The syntax and -functionality should match that of the existing R functions -(though there are some exceptions) so that users are able to use existing tidyverse -or base R syntax, whilst taking advantage of the speed and functionality of the -underlying arrow package. - -One of main ways in which users interact with arrow is via -[dplyr](https://dplyr.tidyverse.org/) syntax called on Arrow objects. For -example, when a user calls `dplyr::mutate()` on an Arrow Tabular, -Dataset, or arrow data query object, the Arrow implementation of `mutate()` is -used and under the hood, translates the dplyr code into Arrow C++ code. - -When using `dplyr::mutate()` or `dplyr::filter()`, you may want to use functions -from other packages. The example below uses `stringr::str_detect()`. - -```{r} -library(dplyr) -library(stringr) -starwars %>% - filter(str_detect(name, "Darth")) -``` -This functionality has also been implemented in Arrow, e.g.: - -```{r} -library(arrow) -arrow_table(starwars) %>% - filter(str_detect(name, "Darth")) %>% - collect() -``` - -This is possible as a **binding** has been created between the call to the -stringr function `str_detect()` and the Arrow C++ code, here as a direct mapping -to `match_substring_regex`. You can see this for yourself by inspecting the -arrow data query object without retrieving the results via `collect()`. - - -```{r} -arrow_table(starwars) %>% - filter(str_detect(name, "Darth")) -``` - -In the following sections, we'll walk through how to create a binding between an -R function and an Arrow C++ function. - -# Walkthrough - -Imagine you are writing the bindings for the C++ function -[`starts_with()`](https://arrow.apache.org/docs/cpp/compute.html#containment-tests) -and want to bind it to the (base) R function `startsWith()`. - -First, take a look at the docs for both of those functions. - -## Examining the R function - -Here are the docs for R's `startsWith()` (also available at https://stat.ethz.ch/R-manual/R-devel/library/base/html/startsWith.html) - -```{r, echo=FALSE, out.width="50%"} -knitr::include_graphics("./startswithdocs.png") -``` - -It takes 2 parameters; `x` - the input, and `prefix` - the characters to check -if `x` starts with. - -## Examining the C++ function - -Now, go to -[the compute function documentation](https://arrow.apache.org/docs/cpp/compute.html#containment-tests) -and look for the Arrow C++ library's `starts_with()` function: - -```{r, echo=FALSE, out.width="100%"} -knitr::include_graphics("./starts_with_docs.png") -``` - -The docs show that `starts_with()` is a unary function, which means that it takes a -single data input. The data input must be a string-like class, and the returned -value is boolean, both of which match up to R's `startsWith()`. - -There is an options class associated with `starts_with()` - called [`MatchSubstringOptions`](https://arrow.apache.org/docs/cpp/api/compute.html#_CPPv4N5arrow7compute21MatchSubstringOptionsE) -- so let's take a look at that. - -```{r, echo=FALSE, out.width="100%"} -knitr::include_graphics("./matchsubstringoptions.png") -``` - -Options classes allow the user to control the behaviour of the function. In -this case, there are two possible options which can be supplied - `pattern` and -`ignore_case`, which are described in the docs shown above. - -## Comparing the R and C++ functions - -What conclusions can be drawn from what you've seen so far? - -Base R's `startsWith()` and Arrow's `starts_with()` operate on equivalent data -types, return equivalent data types, and as there are no options implemented in -R that Arrow doesn't have, this should be fairly simple to map without a great -deal of extra work. - -As `starts_with()` has an options class associated with it, we'll need to make -sure that it's linked up with this in the R code. - -In case you're wondering about the difference between arguments in R and options -in Arrow, in R, arguments to functions can include the actual data to be -analysed as well as options governing how the function works, whereas in the -C++ compute functions, the arguments are the data to be analysed and the -options are for specifying how exactly the function works. - -So let's get started. - -## Step 1 - add unit tests - -We recommend a test-driven-development approach - write failing tests first, -then check that they fail, and then write the code needed to make them pass. -Thinking up-front about the behavior which needs testing can make it easier to -reason about the code which needs writing later. - -Look up the R function that you want to bind the compute kernel to, and write a -set of unit tests that use a dplyr pipeline and `compare_dplyr_binding()` (and -perhaps even `compare_dplyr_error()` if necessary. These functions compare the -output of the original function with the dplyr bindings and make sure they match. -We recommend looking at the [documentation next to the source code for these -functions](https://github.com/apache/arrow/blob/main/r/tests/testthat/helper-expectation.R) -to get a better understanding of how they work. - -You should make sure you're testing all parameters of the R function in your -tests. - -Below is a possible example test for `startsWith()`. - -```{r, eval = FALSE} -test_that("startsWith behaves identically in dplyr and Arrow", { - df <- tibble(x = c("Foo", "bar", "baz", "qux")) - compare_dplyr_binding( - .input %>% - filter(startsWith(x, "b")) %>% - collect(), - df - ) - -}) -``` - -## Step 2 - Hook up the compute function with options class if necessary - -If the C++ compute function can have options specified, make sure that the -function is linked with its options class in `make_compute_options()` in the -file `arrow/r/src/compute.cpp`. You can find out if a compute function requires -options by looking in the docs here: https://arrow.apache.org/docs/cpp/compute.html - -In the case of `starts_with()`, it looks something like this: - -```cpp - if (func_name == "starts_with") { - using Options = arrow::compute::MatchSubstringOptions; - bool ignore_case = false; - if (!Rf_isNull(options["ignore_case"])) { - ignore_case = cpp11::as_cpp(options["ignore_case"]); - } - return std::make_shared(cpp11::as_cpp(options["pattern"]), - ignore_case); - } -``` - -You can usually copy and paste from a similar existing example. In this case, -as the option `ignore_case` doesn't map to any parameters of `startsWith()`, we -give it a default value of `false` but if it's been set, use the set value -instead. As the `pattern` argument maps directly to `prefix` in `startsWith()` -we can pass it straight through. - -## Step 3 - Map the R function to the C++ kernel - -The next task is writing the code which binds the R function to the C++ kernel. - -### Step 3a - See if direct mapping is appropriate -Compare the C++ function and R function. If they are simple functions with no -options, it might be possible to directly map between the C++ and R in -`unary_function_map`, in the case of compute functions that operate on single -columns of data, or `binary_function_map` for those which operate on 2 columns -of data. - -As `startsWith()` requires options, direct mapping is not appropriate. - -### Step 3b - If direct mapping not possible, try a modified implementation -If the function cannot be mapped directly, some extra work may be needed to -ensure that calling the arrow version of the function results in the same result -as calling the R version of the function. In this case, the function will need -adding to the `nse_funcs` function registry. Here is how this might look for -`startsWith()`: - -```{r, eval = FALSE} -register_binding("base::startsWith", function(x, prefix) { - Expression$create( - "starts_with", - x, - options = list(pattern = prefix) - ) -}) -``` - -In the source files, all the `register_binding()` calls are wrapped in functions -that are called on package load. These are separated into files based on -subject matter (e.g., `R/dplyr-funcs-math.R`, `R/dplyr-funcs-string.R`): find the -closest analog to the function whose binding is being defined and define the -new binding in a similar location. For example, the binding for `startsWith()` -is registered in `dplyr-funcs-string.R` next to the binding for `endsWith()`. - -Note: we use the namespace-qualified name (i.e. `"base::startsWith"`) for a -binding. This will register the same binding both as `startsWith()` and as -`base::startsWith()`, which will allow us to use the `pkg::` prefix in a call. - -```{r} -arrow_table(starwars) %>% - filter(stringr::str_detect(name, "Darth")) -``` - -Hint: you can use `call_function()` to call a compute function directly from R. -This might be useful if you want to experiment with a compute function while -you're writing bindings for it, e.g. - -```{r} -call_function( - "starts_with", - Array$create(c("Apache", "Arrow", "R", "package")), - options = list(pattern = "A") -) -``` - -## Step 4 - Run (and potentially add to) your tests. - -In the process of implementing the function, you will need at least one test -to make sure that your binding works and that future changes to the Arrow R -package don't break it! Bindings are tested in files that correspond to -the file in which they were defined (e.g., `startsWith()` is tested in -`tests/testthat/test-dplyr-funcs-string.R`) next to the tests for `endsWith()`. - -You may end up implementing more tests, for example if you discover unusual -edge cases. This is fine - add them to the ones you wrote originally, -and run them all. If they pass, you're done and you can submit a PR. -If you've modified the C++ code in the -R package (for example, when hooking up a binding to its options class), you -should make sure to run `arrow/r/lint.sh` to lint the code. diff --git a/r/vignettes/install.Rmd b/r/vignettes/install.Rmd index df43a9de36fc2..c7b8251ccc99b 100644 --- a/r/vignettes/install.Rmd +++ b/r/vignettes/install.Rmd @@ -28,35 +28,6 @@ For `gcc`, this generally means version 7 or newer. Most contemporary Linux distributions have a new enough compiler; however, CentOS 7 is a notable exception, as it ships with gcc 4.8. -If you are on CentOS 7, to build arrow you will need to install a newer `devtoolset`, and you'll need to update R's Makevars to define the `CXX17` variables. This script installs `devtoolset-8` and configures R to be able to use C++17: - -``` -#!/usr/bin/env bash - -yum install -y centos-release-scl -yum install -y devtoolset-8 -# Optional: also install cloud storage dependencies, as described below -yum install -y libcurl-devel openssl-devel - -source /opt/rh/devtoolset-8/enable - -if [ ! `R CMD config CXX17` ]; then - mkdir -p ~/.R - echo "CC = $(which gcc) -fPIC" >> ~/.R/Makevars - echo "CXX17 = $(which g++) -fPIC" >> ~/.R/Makevars - echo "CXX17STD = -std=c++17" >> ~/.R/Makevars - echo "CXX17FLAGS = ${CXX11FLAGS}" >> ~/.R/Makevars -fi -``` - -Note that the C++17 compiler is only required at *build* time. You don't need -to enable the devtoolset every time you load the package. What's more, if you -install a binary package from RStudio Package Manager (see method 1a below), you -do not need to set up any of this. Likewise, if you `R CMD INSTALL --build` -arrow on a CentOS machine with the newer compilers, you can take the binary -package it produces and install it on any other CentOS machine without those -compilers. - ### Libraries Optional support for reading from cloud storage--AWS S3 and @@ -69,7 +40,7 @@ The prebuilt binaries come with S3 and GCS support enabled, so you will need to ## Install release version (easy way) -On macOS and Windows, when you run `install.packages("arrow")` and install arrow from CRAN, you get an R binary package that contains a precompiled version of libarrow. Installing binaries is much easier that installing from source, but CRAN does not host binaries for Linux. This means that the default behaviour when you run `install.packages()` on Linux is to retrieve the source version of the R package and compile both the R package _and_ libarrow from source. We'll talk about this scenario in the next section (the "less easy" way), but first we'll suggest two faster alternatives that are usually much easier. +On macOS and Windows, when you run `install.packages("arrow")` and install arrow from CRAN, you get an R binary package that contains a precompiled version of libarrow. Installing binaries is much easier that installing from source, but CRAN does not host binaries for Linux. This means that the default behaviour when you run `install.packages()` on Linux is to retrieve the source version of the R package and compile both the R package _and_ libarrow from source. We'll talk about this scenario in the next section (the "less easy" way), but first we'll suggest two faster alternatives that are usually much easier. ### Binary R package with libarrow binary via RSPM/conda @@ -154,15 +125,15 @@ knitr::include_graphics("./r_source_libarrow_source.png") ``` The "less easy" way to install arrow is to install both the R package and the underlying Arrow C++ library (libarrow) from source. This method is somewhat more -difficult because compiling and installing R packages with C++ dependencies -generally requires installing system packages, which you may not have privileges -to do, and/or building the C++ dependencies separately, which introduces all sorts +difficult because compiling and installing R packages with C++ dependencies +generally requires installing system packages, which you may not have privileges +to do, and/or building the C++ dependencies separately, which introduces all sorts of additional ways for things to go wrong. -Installing from the full source build of arrow, compiling both C++ and R -bindings, will handle most of the dependency management for you, but it is -much slower than using binaries. However, if using binaries isn't an option -for you,or you wish to customize your Linux installation, the instructions in +Installing from the full source build of arrow, compiling both C++ and R +bindings, will handle most of the dependency management for you, but it is +much slower than using binaries. However, if using binaries isn't an option +for you,or you wish to customize your Linux installation, the instructions in this section explain how to do that. ### Basic configuration @@ -369,10 +340,10 @@ satisfy C++ dependencies. ## Offline installation -The `install-arrow.R` file mentioned in the previous section includes a -function called `create_package_with_all_dependencies()`. Normally, when -installing on a computer with internet access, the build process will -download third-party dependencies as needed. This function provides a +The `install-arrow.R` file mentioned in the previous section includes a +function called `create_package_with_all_dependencies()`. Normally, when +installing on a computer with internet access, the build process will +download third-party dependencies as needed. This function provides a way to download them in advance, which can be useful when installing Arrow on a computer without internet access. The process is as follows: @@ -380,11 +351,11 @@ on a computer without internet access. The process is as follows: **Step 1.** Using a computer with internet access, download dependencies: * Install the arrow package **or** source the script directly using the following command: - + ```r source("https://raw.githubusercontent.com/apache/arrow/main/r/R/install-arrow.R") ``` - + * Use the `create_package_with_all_dependencies()` function to create the installation bundle: ```r @@ -399,27 +370,27 @@ on a computer without internet access. The process is as follows: ```r install.packages( - "my_arrow_pkg.tar.gz", + "my_arrow_pkg.tar.gz", dependencies = c("Depends", "Imports", "LinkingTo") ) ``` This installation will build from source, so `cmake` must be available - + * Run `arrow_info()` to check installed capabilities Notes: -- arrow _can_ be installed on a computer without internet access -without using this function, but many useful features will be disabled, +- arrow _can_ be installed on a computer without internet access +without using this function, but many useful features will be disabled, as they depend on third-party components. More precisely, `arrow::arrow_info()$capabilities()` will be `FALSE` for every capability. - If you are using binary packages you shouldn't need to this function. You can download the appropriate binary from your package repository, transfer -that to the offline computer, and install that. +that to the offline computer, and install that. - If you're using RStudio Package Manager on Linux (RSPM), and you want to make a source bundle with this function, make sure to set the first repository @@ -517,17 +488,13 @@ The install script should work everywhere, so if libarrow fails to compile, please [report an issue](https://issues.apache.org/jira/projects/ARROW/issues) so that we can improve the script. -### Known installation issues - -* On CentOS, building the package requires a more modern `devtoolset` than the default system compilers. See "System dependencies" above. - ## Contributing -We are constantly working to make the installation process as painless as +We are constantly working to make the installation process as painless as possible. If you find ways to improve the process, please [report an issue](https://github.com/apache/arrow/issues) so that we can document it. Similarly, if you find that your Linux distribution -or version is not supported, we would welcome the contribution of Docker -images (hosted on Docker Hub) that we can use in our continuous integration +or version is not supported, we would welcome the contribution of Docker +images (hosted on Docker Hub) that we can use in our continuous integration and hopefully improve our coverage. If you do contribute a Docker image, it should be as minimal as possible, containing only R and the dependencies it requires. For reference, see the images that @@ -537,19 +504,16 @@ You can test the arrow R package installation using the `docker-compose` setup included in the `apache/arrow` git repository. For example, ``` -R_ORG=rhub R_IMAGE=ubuntu-gcc-release R_TAG=latest docker-compose build r -R_ORG=rhub R_IMAGE=ubuntu-gcc-release R_TAG=latest docker-compose run r +R_ORG=rhub R_IMAGE=ubuntu-release R_TAG=latest docker-compose build r +R_ORG=rhub R_IMAGE=ubuntu-release R_TAG=latest docker-compose run r ``` installs the arrow R package, including libarrow, on the -[rhub/ubuntu-gcc-release](https://hub.docker.com/r/rhub/ubuntu-gcc-release) +[rhub/ubuntu-release](https://hub.docker.com/r/rhub/ubuntu-release) image. ## Further reading - To learn about installing development versions, see the article on [installing nightly builds](./install_nightly.html). -- If you're contributing to the Arrow project, see the [Arrow R developers guide](./developing.html) for resources to help you on set up your development environment. +- If you're contributing to the Arrow project, see the [Arrow R developers guide](./developing.html) for resources to help you on set up your development environment. - Arrow developers may also wish to read a more detailed discussion of the code run during the installation process, described in the [install details article](./developers/install_details.html). - - - diff --git a/ruby/red-arrow/lib/arrow/field-containable.rb b/ruby/red-arrow/lib/arrow/field-containable.rb index e4dbf4ec26cae..103e901f5d2de 100644 --- a/ruby/red-arrow/lib/arrow/field-containable.rb +++ b/ruby/red-arrow/lib/arrow/field-containable.rb @@ -29,7 +29,7 @@ def find_field(name_or_index) return nil if index < 0 or index >= n_fields get_field(index) else - message = "field name or index must be String, Symbol or Integer" + message = +"field name or index must be String, Symbol or Integer" message << ": <#{name_or_index.inspect}>" raise ArgumentError, message end diff --git a/ruby/red-arrow/lib/arrow/table-formatter.rb b/ruby/red-arrow/lib/arrow/table-formatter.rb index d039679f9a03a..b93faf09cbd02 100644 --- a/ruby/red-arrow/lib/arrow/table-formatter.rb +++ b/ruby/red-arrow/lib/arrow/table-formatter.rb @@ -24,7 +24,8 @@ class ColumnFormatter attr_reader :head_values attr_reader :tail_values attr_reader :sample_values - def initialize(column, head_values, tail_values) + def initialize(table_formatter, column, head_values, tail_values) + @table_formatter = table_formatter @column = column @head_values = head_values @tail_values = tail_values @@ -36,6 +37,15 @@ def data_type @data_type ||= @column.data_type end + def formatted_data_type_name + @formatted_data_type_name ||= "(#{data_type.name})" + end + + def aligned_data_type_name + @aligned_data_type_name ||= + "%*s" % [aligned_name.size, formatted_data_type_name] + end + def name @name ||= @column.name end @@ -63,7 +73,7 @@ def format_value(value, width=0) formatted_value = format_value(value[field_name], field_value_width) "#{formatted_name}: #{formatted_value}" end - formatted = "{" + formatted = +"{" formatted << formatted_values.join(", ") formatted << "}" "%-*s" % [width, formatted] @@ -90,9 +100,16 @@ def compute_field_value_width(field, sample_values) end def format_aligned_name(name, data_type, sample_values) + if @table_formatter.show_column_type? + min_width = formatted_data_type_name.size + else + min_width = 0 + end case data_type when TimestampDataType - "%*s" % [::Time.now.iso8601.size, name] + width = ::Time.now.iso8601.size + width = min_width if width < min_width + "%*s" % [width, name] when IntegerDataType have_null = false have_negative = false @@ -118,9 +135,12 @@ def format_aligned_name(name, data_type, sample_values) end width += 1 if have_negative # Need "-" width = [width, FORMATTED_NULL.size].max if have_null + width = min_width if width < min_width "%*s" % [width, name] when FloatDataType, DoubleDataType - "%*s" % [FLOAT_N_DIGITS, name] + width = FLOAT_N_DIGITS + width = min_width if width < min_width + "%*s" % [width, name] when StructDataType field_widths = data_type.fields.collect do |field| field_value_width = compute_field_value_width(field, sample_values) @@ -130,9 +150,11 @@ def format_aligned_name(name, data_type, sample_values) if field_widths.size > 0 width += (", ".size * (field_widths.size - 1)) end + width = min_width if width < min_width "%*s" % [width, name] else - name + width = min_width + "%*s" % [width, name] end end end @@ -143,7 +165,7 @@ def initialize(table, options={}) end def format - text = "" + text = +"" n_rows = @table.n_rows border = @options[:border] || 10 @@ -159,7 +181,7 @@ def format else tail_values = [] end - ColumnFormatter.new(column, head_values, tail_values) + ColumnFormatter.new(self, column, head_values, tail_values) end format_header(text, column_formatters) @@ -186,5 +208,9 @@ def format text end + + def show_column_type? + @options.fetch(:show_column_type, true) + end end end diff --git a/ruby/red-arrow/lib/arrow/table-list-formatter.rb b/ruby/red-arrow/lib/arrow/table-list-formatter.rb index 4fe2934160a69..3e4d410ffbee8 100644 --- a/ruby/red-arrow/lib/arrow/table-list-formatter.rb +++ b/ruby/red-arrow/lib/arrow/table-list-formatter.rb @@ -27,9 +27,9 @@ def format_rows(text, column_formatters, rows, n_digits, start_offset) text << ("=" * 20 + " #{start_offset + nth_row} " + "=" * 20 + "\n") row.each_with_index do |column_value, nth_column| column_formatter = column_formatters[nth_column] - formatted_name = column_formatter.name - formatted_value = column_formatter.format_value(column_value) - text << "#{formatted_name}: #{formatted_value}\n" + text << column_formatter.name + text << "(#{column_formatter.data_type.name})" if show_column_type? + text << ": #{column_formatter.format_value(column_value)}\n" end end end diff --git a/ruby/red-arrow/lib/arrow/table-table-formatter.rb b/ruby/red-arrow/lib/arrow/table-table-formatter.rb index 36121e1b6f0e4..acf4aca8bb6d1 100644 --- a/ruby/red-arrow/lib/arrow/table-table-formatter.rb +++ b/ruby/red-arrow/lib/arrow/table-table-formatter.rb @@ -26,6 +26,13 @@ def format_header(text, column_formatters) text << "\t" text << column_formatter.aligned_name end + if show_column_type? + text << "\n" + column_formatters.each do |column_formatter| + text << "\t" + text << column_formatter.aligned_data_type_name + end + end text << "\n" end diff --git a/ruby/red-arrow/test/test-csv-loader.rb b/ruby/red-arrow/test/test-csv-loader.rb index 0b21f6f9b71f9..1e0445db06ef9 100644 --- a/ruby/red-arrow/test/test-csv-loader.rb +++ b/ruby/red-arrow/test/test-csv-loader.rb @@ -27,80 +27,88 @@ def load_csv(input) test("String: data: with header") do data = fixture_path("with-header-float.csv").read assert_equal(<<-TABLE, load_csv(data).to_s) - name score -0 alice 10.100000 -1 bob 29.200000 -2 chris -1.300000 + name score + (utf8) (double) +0 alice 10.100000 +1 bob 29.200000 +2 chris -1.300000 TABLE end test("String: data: without header") do data = fixture_path("without-header-float.csv").read assert_equal(<<-TABLE, load_csv(data).to_s) - 0 1 -0 alice 10.100000 -1 bob 29.200000 -2 chris -1.300000 + 0 1 + (utf8) (double) +0 alice 10.100000 +1 bob 29.200000 +2 chris -1.300000 TABLE end test("String: path: with header") do path = fixture_path("with-header-float.csv").to_s assert_equal(<<-TABLE, load_csv(path).to_s) - name score -0 alice 10.100000 -1 bob 29.200000 -2 chris -1.300000 + name score + (utf8) (double) +0 alice 10.100000 +1 bob 29.200000 +2 chris -1.300000 TABLE end test("String: path: without header") do path = fixture_path("without-header-float.csv").to_s assert_equal(<<-TABLE, load_csv(path).to_s) - 0 1 -0 alice 10.100000 -1 bob 29.200000 -2 chris -1.300000 + 0 1 + (utf8) (double) +0 alice 10.100000 +1 bob 29.200000 +2 chris -1.300000 TABLE end test("Pathname: with header") do path = fixture_path("with-header-float.csv") assert_equal(<<-TABLE, load_csv(path).to_s) - name score -0 alice 10.100000 -1 bob 29.200000 -2 chris -1.300000 + name score + (utf8) (double) +0 alice 10.100000 +1 bob 29.200000 +2 chris -1.300000 TABLE end test("Pathname: without header") do path = fixture_path("without-header-float.csv") assert_equal(<<-TABLE, load_csv(path).to_s) - 0 1 -0 alice 10.100000 -1 bob 29.200000 -2 chris -1.300000 + 0 1 + (utf8) (double) +0 alice 10.100000 +1 bob 29.200000 +2 chris -1.300000 TABLE end test("null: with double quote") do path = fixture_path("null-with-double-quote.csv").to_s assert_equal(<<-TABLE, load_csv(path).to_s) - name score -0 alice 10 -1 bob (null) -2 chris -1 + name score + (utf8) (int8) +0 alice 10 +1 bob (null) +2 chris -1 TABLE end test("null: without double quote") do path = fixture_path("null-without-double-quote.csv").to_s assert_equal(<<-TABLE, load_csv(path).to_s) - name score -0 alice 10 -1 bob (null) -2 chris -1 + name score + (utf8) (int8) +0 alice 10 +1 bob (null) +2 chris -1 TABLE end diff --git a/ruby/red-arrow/test/test-group.rb b/ruby/red-arrow/test/test-group.rb index 68e927df69bc8..f4831289eda48 100644 --- a/ruby/red-arrow/test/test-group.rb +++ b/ruby/red-arrow/test/test-group.rb @@ -43,6 +43,7 @@ def setup table = Arrow::Table.new(raw_table) assert_equal(<<-TABLE, table.group(:time).count.to_s) time count(int) + (timestamp) (int64) 0 #{time_values[0].iso8601} 1 1 #{time_values[1].iso8601} 1 TABLE @@ -53,6 +54,7 @@ def setup test("single") do assert_equal(<<-TABLE, @table.group(:group_key1).count.to_s) group_key1 count(group_key2) count(int) count(uint) count(float) count(string) + (uint8) (int64) (int64) (int64) (int64) (int64) 0 1 2 2 1 1 2 1 2 1 0 1 1 1 2 3 3 3 3 3 2 @@ -62,6 +64,7 @@ def setup test("multiple") do assert_equal(<<-TABLE, @table.group(:group_key1, :group_key2).count.to_s) group_key1 group_key2 count(int) count(uint) count(float) count(string) + (uint8) (uint8) (int64) (int64) (int64) (int64) 0 1 1 2 1 1 2 1 2 1 0 1 1 1 2 3 1 1 1 1 0 @@ -73,6 +76,7 @@ def setup group = @table.group(:group_key1, :group_key2) assert_equal(<<-TABLE, group.count(:int, :uint).to_s) group_key1 group_key2 count(int) count(uint) + (uint8) (uint8) (int64) (int64) 0 1 1 2 1 1 2 1 0 1 2 3 1 1 1 @@ -85,6 +89,7 @@ def setup test("single") do assert_equal(<<-TABLE, @table.group(:group_key1).sum.to_s) group_key1 sum(group_key2) sum(int) sum(uint) sum(float) + (uint8) (uint64) (int64) (uint64) (double) 0 1 2 -3 1 2.200000 1 2 1 (null) 3 3.300000 2 3 5 -15 15 16.500000 @@ -94,6 +99,7 @@ def setup test("multiple") do assert_equal(<<-TABLE, @table.group(:group_key1, :group_key2).sum.to_s) group_key1 group_key2 sum(int) sum(uint) sum(float) + (uint8) (uint8) (int64) (uint64) (double) 0 1 1 -3 1 2.200000 1 2 1 (null) 3 3.300000 2 3 1 -4 4 4.400000 @@ -106,6 +112,7 @@ def setup test("single") do assert_equal(<<-TABLE, @table.group(:group_key1).mean.to_s) group_key1 mean(group_key2) mean(int) mean(uint) mean(float) + (uint8) (double) (double) (double) (double) 0 1 1.000000 -1.500000 1.000000 2.200000 1 2 1.000000 (null) 3.000000 3.300000 2 3 1.666667 -5.000000 5.000000 5.500000 @@ -115,6 +122,7 @@ def setup test("multiple") do assert_equal(<<-TABLE, @table.group(:group_key1, :group_key2).mean.to_s) group_key1 group_key2 mean(int) mean(uint) mean(float) + (uint8) (uint8) (double) (double) (double) 0 1 1 -1.500000 1.000000 2.200000 1 2 1 (null) 3.000000 3.300000 2 3 1 -4.000000 4.000000 4.400000 @@ -127,6 +135,7 @@ def setup test("single") do assert_equal(<<-TABLE, @table.group(:group_key1).min.to_s) group_key1 min(group_key2) min(int) min(uint) min(float) + (uint8) (uint8) (int32) (uint32) (float) 0 1 1 -2 1 2.200000 1 2 1 (null) 3 3.300000 2 3 1 -6 4 4.400000 @@ -136,6 +145,7 @@ def setup test("multiple") do assert_equal(<<-TABLE, @table.group(:group_key1, :group_key2).min.to_s) group_key1 group_key2 min(int) min(uint) min(float) + (uint8) (uint8) (int32) (uint32) (float) 0 1 1 -2 1 2.200000 1 2 1 (null) 3 3.300000 2 3 1 -4 4 4.400000 @@ -148,6 +158,7 @@ def setup test("single") do assert_equal(<<-TABLE, @table.group(:group_key1).max.to_s) group_key1 max(group_key2) max(int) max(uint) max(float) + (uint8) (uint8) (int32) (uint32) (float) 0 1 1 -1 1 2.200000 1 2 1 (null) 3 3.300000 2 3 2 -4 6 6.600000 @@ -157,6 +168,7 @@ def setup test("multiple") do assert_equal(<<-TABLE, @table.group(:group_key1, :group_key2).max.to_s) group_key1 group_key2 max(int) max(uint) max(float) + (uint8) (uint8) (int32) (uint32) (float) 0 1 1 -1 1 2.200000 1 2 1 (null) 3 3.300000 2 3 1 -4 4 4.400000 @@ -170,6 +182,7 @@ def setup group = @table.group(:group_key1, :group_key2) assert_equal(<<-TABLE, group.aggregate("count(int)", "sum(uint)").to_s) group_key1 group_key2 count(int) sum(uint) + (uint8) (uint8) (int64) (uint64) 0 1 1 2 1 1 2 1 0 3 2 3 1 1 4 diff --git a/ruby/red-arrow/test/test-schema.rb b/ruby/red-arrow/test/test-schema.rb index 20d73b2726d6b..c4164d83903f2 100644 --- a/ruby/red-arrow/test/test-schema.rb +++ b/ruby/red-arrow/test/test-schema.rb @@ -95,7 +95,7 @@ def setup test("[invalid]") do invalid = [] - message = "field name or index must be String, Symbol or Integer" + message = +"field name or index must be String, Symbol or Integer" message << ": <#{invalid.inspect}>" assert_raise(ArgumentError.new(message)) do @schema[invalid] diff --git a/ruby/red-arrow/test/test-slicer.rb b/ruby/red-arrow/test/test-slicer.rb index d33748a387c8f..89cf34b0d13f7 100644 --- a/ruby/red-arrow/test/test-slicer.rb +++ b/ruby/red-arrow/test/test-slicer.rb @@ -45,11 +45,12 @@ def setup slicer.visible end assert_equal(<<-TABLE, sliced_table.to_s) - count visible -0 1 true -1 8 true -2 16 true -3 256 true + count visible + (uint32) (bool) +0 1 true +1 8 true +2 16 true +3 256 true TABLE end @@ -58,15 +59,16 @@ def setup slicer.count end assert_equal(<<-TABLE, sliced_table.to_s) - count visible -0 1 true -1 2 false -2 4 (null) -3 8 true -4 16 true -5 32 false -6 64 (null) -7 256 true + count visible + (uint32) (bool) +0 1 true +1 2 false +2 4 (null) +3 8 true +4 16 true +5 32 false +6 64 (null) +7 256 true TABLE end end @@ -77,9 +79,10 @@ def setup !slicer.visible end assert_equal(<<-TABLE, sliced_table.to_s) - count visible -0 2 false -1 32 false + count visible + (uint32) (bool) +0 2 false +1 32 false TABLE end @@ -88,8 +91,9 @@ def setup !slicer.count end assert_equal(<<-TABLE, sliced_table.to_s) - count visible -0 0 (null) + count visible + (uint32) (bool) +0 0 (null) TABLE end end @@ -99,11 +103,12 @@ def setup slicer.visible.null? end assert_equal(<<-TABLE, sliced_table.to_s) - count visible -0 0 (null) -1 4 (null) -2 64 (null) -3 (null) (null) + count visible + (uint32) (bool) +0 0 (null) +1 4 (null) +2 64 (null) +3 (null) (null) TABLE end @@ -112,13 +117,14 @@ def setup slicer.visible.valid? end assert_equal(<<-TABLE, sliced_table.to_s) - count visible -0 1 true -1 2 false -2 8 true -3 16 true -4 32 false -5 256 true + count visible + (uint32) (bool) +0 1 true +1 2 false +2 8 true +3 16 true +4 32 false +5 256 true TABLE end @@ -128,11 +134,12 @@ def setup slicer.visible == nil end assert_equal(<<-TABLE, sliced_table.to_s) - count visible -0 0 (null) -1 4 (null) -2 64 (null) -3 (null) (null) + count visible + (uint32) (bool) +0 0 (null) +1 4 (null) +2 64 (null) +3 (null) (null) TABLE end @@ -141,11 +148,12 @@ def setup slicer.visible == true end assert_equal(<<-TABLE, sliced_table.to_s) - count visible -0 1 true -1 8 true -2 16 true -3 256 true + count visible + (uint32) (bool) +0 1 true +1 8 true +2 16 true +3 256 true TABLE end end @@ -156,13 +164,14 @@ def setup !(slicer.visible == nil) end assert_equal(<<-TABLE, sliced_table.to_s) - count visible -0 1 true -1 2 false -2 8 true -3 16 true -4 32 false -5 256 true + count visible + (uint32) (bool) +0 1 true +1 2 false +2 8 true +3 16 true +4 32 false +5 256 true TABLE end @@ -171,9 +180,10 @@ def setup !(slicer.visible == true) end assert_equal(<<-TABLE, sliced_table.to_s) - count visible -0 2 false -1 32 false + count visible + (uint32) (bool) +0 2 false +1 32 false TABLE end end @@ -184,13 +194,14 @@ def setup slicer.visible != nil end assert_equal(<<-TABLE, sliced_table.to_s) - count visible -0 1 true -1 2 false -2 8 true -3 16 true -4 32 false -5 256 true + count visible + (uint32) (bool) +0 1 true +1 2 false +2 8 true +3 16 true +4 32 false +5 256 true TABLE end @@ -199,9 +210,10 @@ def setup slicer.visible != true end assert_equal(<<-TABLE, sliced_table.to_s) - count visible -0 2 false -1 32 false + count visible + (uint32) (bool) +0 2 false +1 32 false TABLE end end @@ -211,12 +223,13 @@ def setup slicer.count < 16 end assert_equal(<<-TABLE, sliced_table.to_s) - count visible -0 0 (null) -1 1 true -2 2 false -3 4 (null) -4 8 true + count visible + (uint32) (bool) +0 0 (null) +1 1 true +2 2 false +3 4 (null) +4 8 true TABLE end @@ -225,11 +238,12 @@ def setup !(slicer.count < 16) end assert_equal(<<-TABLE, sliced_table.to_s) - count visible -0 16 true -1 32 false -2 64 (null) -3 256 true + count visible + (uint32) (bool) +0 16 true +1 32 false +2 64 (null) +3 256 true TABLE end @@ -238,13 +252,14 @@ def setup slicer.count <= 16 end assert_equal(<<-TABLE, sliced_table.to_s) - count visible -0 0 (null) -1 1 true -2 2 false -3 4 (null) -4 8 true -5 16 true + count visible + (uint32) (bool) +0 0 (null) +1 1 true +2 2 false +3 4 (null) +4 8 true +5 16 true TABLE end @@ -253,10 +268,11 @@ def setup !(slicer.count <= 16) end assert_equal(<<-TABLE, sliced_table.to_s) - count visible -0 32 false -1 64 (null) -2 256 true + count visible + (uint32) (bool) +0 32 false +1 64 (null) +2 256 true TABLE end @@ -265,10 +281,11 @@ def setup slicer.count > 16 end assert_equal(<<-TABLE, sliced_table.to_s) - count visible -0 32 false -1 64 (null) -2 256 true + count visible + (uint32) (bool) +0 32 false +1 64 (null) +2 256 true TABLE end @@ -277,13 +294,14 @@ def setup !(slicer.count > 16) end assert_equal(<<-TABLE, sliced_table.to_s) - count visible -0 0 (null) -1 1 true -2 2 false -3 4 (null) -4 8 true -5 16 true + count visible + (uint32) (bool) +0 0 (null) +1 1 true +2 2 false +3 4 (null) +4 8 true +5 16 true TABLE end @@ -292,11 +310,12 @@ def setup slicer.count >= 16 end assert_equal(<<-TABLE, sliced_table.to_s) - count visible -0 16 true -1 32 false -2 64 (null) -3 256 true + count visible + (uint32) (bool) +0 16 true +1 32 false +2 64 (null) +3 256 true TABLE end @@ -305,12 +324,13 @@ def setup !(slicer.count >= 16) end assert_equal(<<-TABLE, sliced_table.to_s) - count visible -0 0 (null) -1 1 true -2 2 false -3 4 (null) -4 8 true + count visible + (uint32) (bool) +0 0 (null) +1 1 true +2 2 false +3 4 (null) +4 8 true TABLE end @@ -319,11 +339,12 @@ def setup slicer.count.in?([1, 4, 16, 64]) end assert_equal(<<-TABLE, sliced_table.to_s) - count visible -0 1 true -1 4 (null) -2 16 true -3 64 (null) + count visible + (uint32) (bool) +0 1 true +1 4 (null) +2 16 true +3 64 (null) TABLE end @@ -332,13 +353,14 @@ def setup !slicer.count.in?([1, 4, 16, 64]) end assert_equal(<<-TABLE, sliced_table.to_s) - count visible -0 0 (null) -1 2 false -2 8 true -3 32 false -4 (null) (null) -5 256 true + count visible + (uint32) (bool) +0 0 (null) +1 2 false +2 8 true +3 32 false +4 (null) (null) +5 256 true TABLE end @@ -347,9 +369,10 @@ def setup slicer.visible & (slicer.count >= 16) end assert_equal(<<-TABLE, sliced_table.to_s) - count visible -0 16 true -1 256 true + count visible + (uint32) (bool) +0 16 true +1 256 true TABLE end @@ -358,12 +381,13 @@ def setup slicer.visible | (slicer.count >= 16) end assert_equal(<<-TABLE, sliced_table.to_s) - count visible -0 1 true -1 8 true -2 16 true -3 32 false -4 256 true + count visible + (uint32) (bool) +0 1 true +1 8 true +2 16 true +3 32 false +4 256 true TABLE end @@ -372,10 +396,11 @@ def setup slicer.visible ^ (slicer.count >= 16) end assert_equal(<<-TABLE, sliced_table.to_s) - count visible -0 1 true -1 8 true -2 32 false + count visible + (uint32) (bool) +0 1 true +1 8 true +2 32 false TABLE end @@ -386,15 +411,16 @@ def setup end end assert_equal(<<-TABLE, sliced_table.to_s) - count visible -0 0 (null) -1 1 true -2 4 (null) -3 8 true -4 16 true -5 64 (null) -6 (null) (null) -7 256 true + count visible + (uint32) (bool) +0 0 (null) +1 1 true +2 4 (null) +3 8 true +4 16 true +5 64 (null) +6 (null) (null) +7 256 true TABLE end @@ -405,9 +431,10 @@ def setup end end assert_equal(<<-TABLE, sliced_table.to_s) - count visible -0 2 false -1 32 false + count visible + (uint32) (bool) +0 2 false +1 32 false TABLE end @@ -418,9 +445,10 @@ def setup end end assert_equal(<<-TABLE, sliced_table.to_s) - count visible -0 2 false -1 32 false + count visible + (uint32) (bool) +0 2 false +1 32 false TABLE end @@ -431,15 +459,16 @@ def setup end end assert_equal(<<-TABLE, sliced_table.to_s) - count visible -0 0 (null) -1 1 true -2 4 (null) -3 8 true -4 16 true -5 64 (null) -6 (null) (null) -7 256 true + count visible + (uint32) (bool) +0 0 (null) +1 1 true +2 4 (null) +3 8 true +4 16 true +5 64 (null) +6 (null) (null) +7 256 true TABLE end @@ -456,6 +485,7 @@ def setup end assert_equal(<<~TABLE, sliced_table.to_s) string + (utf8) 0 Arrow 1 window TABLE @@ -467,6 +497,7 @@ def setup end assert_equal(<<~TABLE, sliced_table.to_s) string + (utf8) 0 array 1 Arrow TABLE @@ -478,6 +509,7 @@ def setup end assert_equal(<<~TABLE, sliced_table.to_s) string + (utf8) 0 array 1 carrot TABLE @@ -489,6 +521,7 @@ def setup end assert_equal(<<~TABLE, sliced_table.to_s) string + (utf8) 0 array 1 Arrow 2 carrot @@ -501,6 +534,7 @@ def setup end assert_equal(<<~TABLE, sliced_table.to_s) string + (utf8) 0 Arrow 1 window TABLE @@ -512,6 +546,7 @@ def setup end assert_equal(<<~TABLE, sliced_table.to_s) string + (utf8) 0 Arrow 1 window TABLE @@ -523,6 +558,7 @@ def setup end assert_equal(<<~TABLE, sliced_table.to_s) string + (utf8) 0 array 1 Arrow 2 carrot @@ -545,6 +581,7 @@ def setup end assert_equal(<<~TABLE, sliced_table.to_s) string + (utf8) 0 carrot TABLE end diff --git a/ruby/red-arrow/test/test-struct-data-type.rb b/ruby/red-arrow/test/test-struct-data-type.rb index d106e38b1d841..9bf9a17dd645a 100644 --- a/ruby/red-arrow/test/test-struct-data-type.rb +++ b/ruby/red-arrow/test/test-struct-data-type.rb @@ -101,7 +101,7 @@ def setup test("[invalid]") do invalid = [] - message = "field name or index must be String, Symbol or Integer" + message = +"field name or index must be String, Symbol or Integer" message << ": <#{invalid.inspect}>" assert_raise(ArgumentError.new(message)) do @data_type[invalid] diff --git a/ruby/red-arrow/test/test-table.rb b/ruby/red-arrow/test/test-table.rb index 883cf70c269bb..a69e926156809 100644 --- a/ruby/red-arrow/test/test-table.rb +++ b/ruby/red-arrow/test/test-table.rb @@ -87,24 +87,26 @@ def array_like.to_ary target_rows_raw = [nil, true, true, false, true, false, true, true] target_rows = Arrow::BooleanArray.new(target_rows_raw) assert_equal(<<-TABLE, @table.slice(target_rows).to_s) - count visible -0 2 false -1 4 (null) -2 16 true -3 64 (null) -4 128 (null) + count visible + (uint8) (bool) +0 2 false +1 4 (null) +2 16 true +3 64 (null) +4 128 (null) TABLE end test("Array: boolean") do target_rows_raw = [nil, true, true, false, true, false, true, true] assert_equal(<<-TABLE, @table.slice(target_rows_raw).to_s) - count visible -0 2 false -1 4 (null) -2 16 true -3 64 (null) -4 128 (null) + count visible + (uint8) (bool) +0 2 false +1 4 (null) +2 16 true +3 64 (null) +4 128 (null) TABLE end @@ -131,83 +133,93 @@ def array_like.to_ary test("Range: positive: include end") do assert_equal(<<-TABLE, @table.slice(2..4).to_s) - count visible -0 4 (null) -1 8 true -2 16 true + count visible + (uint8) (bool) +0 4 (null) +1 8 true +2 16 true TABLE end test("Range: positive: exclude end") do assert_equal(<<-TABLE, @table.slice(2...4).to_s) - count visible -0 4 (null) -1 8 true + count visible + (uint8) (bool) +0 4 (null) +1 8 true TABLE end test("Range: negative: include end") do assert_equal(<<-TABLE, @table.slice(-4..-2).to_s) - count visible -0 16 true -1 32 false -2 64 (null) + count visible + (uint8) (bool) +0 16 true +1 32 false +2 64 (null) TABLE end test("Range: negative: exclude end") do assert_equal(<<-TABLE, @table.slice(-4...-2).to_s) - count visible -0 16 true -1 32 false + count visible + (uint8) (bool) +0 16 true +1 32 false TABLE end test("[from, to]: positive") do assert_equal(<<-TABLE, @table.slice(0, 2).to_s) - count visible -0 1 true -1 2 false + count visible + (uint8) (bool) +0 1 true +1 2 false TABLE end test("[from, to]: negative") do assert_equal(<<-TABLE, @table.slice(-4, 2).to_s) - count visible -0 16 true -1 32 false + count visible + (uint8) (bool) +0 16 true +1 32 false TABLE end test("{key: Number}") do assert_equal(<<-TABLE, @table.slice(count: 16).to_s) - count visible -0 16 true + count visible + (uint8) (bool) +0 16 true TABLE end test("{key: String}") do table = Arrow::Table.new(name: Arrow::StringArray.new(["a", "b", "c"])) assert_equal(<<-TABLE, table.slice(name: 'b').to_s) - name -0 b + name + (utf8) +0 b TABLE end test("{key: true}") do assert_equal(<<-TABLE, @table.slice(visible: true).to_s) - count visible -0 1 true -1 8 true -2 16 true + count visible + (uint8) (bool) +0 1 true +1 8 true +2 16 true TABLE end test("{key: false}") do assert_equal(<<-TABLE, @table.slice(visible: false).to_s) - count visible -0 2 false -1 32 false + count visible + (uint8) (bool) +0 2 false +1 32 false TABLE end @@ -218,11 +230,12 @@ def array_like.to_ary omit("beginless range isn't supported") end assert_equal(<<-TABLE, @table.slice(count: range).to_s) - count visible -0 1 true -1 2 false -2 4 (null) -3 8 true + count visible + (uint8) (bool) +0 1 true +1 2 false +2 4 (null) +3 8 true TABLE end @@ -233,10 +246,11 @@ def array_like.to_ary omit("beginless range isn't supported") end assert_equal(<<-TABLE, @table.slice(count: range).to_s) - count visible -0 1 true -1 2 false -2 4 (null) + count visible + (uint8) (bool) +0 1 true +1 2 false +2 4 (null) TABLE end @@ -247,39 +261,43 @@ def array_like.to_ary omit("endless range isn't supported") end assert_equal(<<-TABLE, @table.slice(count: range).to_s) - count visible -0 16 true -1 32 false -2 64 (null) -3 128 (null) + count visible + (uint8) (bool) +0 16 true +1 32 false +2 64 (null) +3 128 (null) TABLE end test("{key: Range}: include end") do assert_equal(<<-TABLE, @table.slice(count: 1..16).to_s) - count visible -0 1 true -1 2 false -2 4 (null) -3 8 true -4 16 true + count visible + (uint8) (bool) +0 1 true +1 2 false +2 4 (null) +3 8 true +4 16 true TABLE end test("{key: Range}: exclude end") do assert_equal(<<-TABLE, @table.slice(count: 1...16).to_s) - count visible -0 1 true -1 2 false -2 4 (null) -3 8 true + count visible + (uint8) (bool) +0 1 true +1 2 false +2 4 (null) +3 8 true TABLE end test("{key1: Range, key2: true}") do assert_equal(<<-TABLE, @table.slice(count: 0..8, visible: false).to_s) - count visible -0 2 false + count visible + (uint8) (bool) +0 2 false TABLE end @@ -372,44 +390,47 @@ def setup test("add") do name_array = Arrow::StringArray.new(["a", "b", "c", "d", "e", "f", "g", "h"]) assert_equal(<<-TABLE, @table.merge(:name => name_array).to_s) - count visible name -0 1 true a -1 2 false b -2 4 (null) c -3 8 true d -4 16 true e -5 32 false f -6 64 (null) g -7 128 (null) h + count visible name + (uint8) (bool) (utf8) +0 1 true a +1 2 false b +2 4 (null) c +3 8 true d +4 16 true e +5 32 false f +6 64 (null) g +7 128 (null) h TABLE end test("remove") do assert_equal(<<-TABLE, @table.merge(:visible => nil).to_s) - count -0 1 -1 2 -2 4 -3 8 -4 16 -5 32 -6 64 -7 128 + count + (uint8) +0 1 +1 2 +2 4 +3 8 +4 16 +5 32 +6 64 +7 128 TABLE end test("replace") do visible_array = Arrow::Int32Array.new([1] * @visible_array.length) assert_equal(<<-TABLE, @table.merge(:visible => visible_array).to_s) - count visible -0 1 1 -1 2 1 -2 4 1 -3 8 1 -4 16 1 -5 32 1 -6 64 1 -7 128 1 + count visible + (uint8) (int32) +0 1 1 +1 2 1 +2 4 1 +3 8 1 +4 16 1 +5 32 1 +6 64 1 +7 128 1 TABLE end end @@ -419,15 +440,16 @@ def setup name_array = Arrow::StringArray.new(["a", "b", "c", "d", "e", "f", "g", "h"]) table = Arrow::Table.new("name" => name_array) assert_equal(<<-TABLE, @table.merge(table).to_s) - count visible name -0 1 true a -1 2 false b -2 4 (null) c -3 8 true d -4 16 true e -5 32 false f -6 64 (null) g -7 128 (null) h + count visible name + (uint8) (bool) (utf8) +0 1 true a +1 2 false b +2 4 (null) c +3 8 true d +4 16 true e +5 32 false f +6 64 (null) g +7 128 (null) h TABLE end @@ -435,15 +457,16 @@ def setup visible_array = Arrow::Int32Array.new([1] * @visible_array.length) table = Arrow::Table.new("visible" => visible_array) assert_equal(<<-TABLE, @table.merge(table).to_s) - count visible -0 1 1 -1 2 1 -2 4 1 -3 8 1 -4 16 1 -5 32 1 -6 64 1 -7 128 1 + count visible + (uint8) (int32) +0 1 1 +1 2 1 +2 4 1 +3 8 1 +4 16 1 +5 32 1 +6 64 1 +7 128 1 TABLE end end @@ -457,29 +480,31 @@ def setup sub_test_case("#remove_column") do test("String") do assert_equal(<<-TABLE, @table.remove_column("visible").to_s) - count -0 1 -1 2 -2 4 -3 8 -4 16 -5 32 -6 64 -7 128 + count + (uint8) +0 1 +1 2 +2 4 +3 8 +4 16 +5 32 +6 64 +7 128 TABLE end test("Symbol") do assert_equal(<<-TABLE, @table.remove_column(:visible).to_s) - count -0 1 -1 2 -2 4 -3 8 -4 16 -5 32 -6 64 -7 128 + count + (uint8) +0 1 +1 2 +2 4 +3 8 +4 16 +5 32 +6 64 +7 128 TABLE end @@ -491,29 +516,31 @@ def setup test("Integer") do assert_equal(<<-TABLE, @table.remove_column(1).to_s) - count -0 1 -1 2 -2 4 -3 8 -4 16 -5 32 -6 64 -7 128 + count + (uint8) +0 1 +1 2 +2 4 +3 8 +4 16 +5 32 +6 64 +7 128 TABLE end test("negative integer") do assert_equal(<<-TABLE, @table.remove_column(-1).to_s) - count -0 1 -1 2 -2 4 -3 8 -4 16 -5 32 -6 64 -7 128 + count + (uint8) +0 1 +1 2 +2 4 +3 8 +4 16 +5 32 +6 64 +7 128 TABLE end @@ -544,29 +571,33 @@ def setup test("names") do assert_equal(<<-TABLE, @table.select_columns(:c, :a).to_s) - c a -0 1 1 + c a + (uint8) (uint8) +0 1 1 TABLE end test("range") do assert_equal(<<-TABLE, @table.select_columns(2...4).to_s) - c d -0 1 1 + c d + (uint8) (uint8) +0 1 1 TABLE end test("indexes") do assert_equal(<<-TABLE, @table.select_columns(0, -1, 2).to_s) - a e c -0 1 1 1 + a e c + (uint8) (uint8) (uint8) +0 1 1 1 TABLE end test("mixed") do assert_equal(<<-TABLE, @table.select_columns(:a, -1, 2..3).to_s) - a e c d -0 1 1 1 1 + a e c d + (uint8) (uint8) (uint8) (uint8) +0 1 1 1 1 TABLE end @@ -575,8 +606,9 @@ def setup column.name == "a" or i.odd? end assert_equal(<<-TABLE, selected_table.to_s) - a b d -0 1 1 1 + a b d + (uint8) (uint8) (uint8) +0 1 1 1 TABLE end @@ -585,15 +617,17 @@ def setup column.name == "a" end assert_equal(<<-TABLE, selected_table.to_s) - a -0 1 + a + (uint8) +0 1 TABLE end test("empty result") do selected_table = @table.filter([false] * @table.size).select_columns(:a) assert_equal(<<-TABLE, selected_table.to_s) - a + a + (uint8) TABLE end end @@ -682,7 +716,7 @@ def test_json output = create_output(".json") # TODO: Implement this. # @table.save(output, format: :json) - columns = "" + columns = +"" @table.each_record.each do |record| column = { "count" => record.count, @@ -789,10 +823,11 @@ def create_output(extension) path = fixture_path("with-header.csv") table = Arrow::Table.load(path, skip_lines: /^\#/) assert_equal(<<-TABLE, table.to_s) - name score -0 alice 10 -1 bob 29 -2 chris -1 + name score + (utf8) (int8) +0 alice 10 +1 bob 29 +2 chris -1 TABLE end @@ -808,10 +843,11 @@ def create_output(extension) CSV end assert_equal(<<-TABLE, Arrow::Table.load(file.path).to_s) - name score -0 alice 10 -1 bob 29 -2 chris -1 + name score + (utf8) (int64) +0 alice 10 +1 bob 29 +2 chris -1 TABLE end @@ -826,10 +862,11 @@ def create_output(extension) file.close table = Arrow::Table.load(file.path) assert_equal(<<-TABLE, table.to_s) - name score -0 alice 10 -1 bob 29 -2 chris -1 + name score + (utf8) (int64) +0 alice 10 +1 bob 29 +2 chris -1 TABLE end end @@ -881,7 +918,7 @@ def test_http(data) output.data.to_s, content_type) do |port| input = URI("http://127.0.0.1:#{port}#{path}") - loaded_table = Arrow::Table.load(input) + loaded_table = Arrow::Table.load(input, schema: @table.schema) assert_equal(@table.to_s, loaded_table.to_s) end end @@ -962,15 +999,16 @@ def test_join packed_table = @table.pack column_n_chunks = packed_table.columns.collect {|c| c.data.n_chunks} assert_equal([[1, 1], <<-TABLE], [column_n_chunks, packed_table.to_s]) - count visible -0 1 true -1 2 false -2 4 (null) -3 8 true -4 16 true -5 32 false -6 64 (null) -7 128 (null) + count visible + (uint8) (bool) +0 1 true +1 2 false +2 4 (null) +3 8 true +4 16 true +5 32 false +6 64 (null) +7 128 (null) TABLE end @@ -1009,19 +1047,20 @@ def setup test(":list") do assert_equal(<<-TABLE, @table.to_s(format: :list)) ==================== 0 ==================== -count: 1 -visible: true +count(uint8): 1 +visible(bool): true ==================== 1 ==================== -count: 2 -visible: false +count(uint8): 2 +visible(bool): false TABLE end test(":table") do assert_equal(<<-TABLE, @table.to_s(format: :table)) - count visible -0 1 true -1 2 false + count visible + (uint8) (bool) +0 1 true +1 2 false TABLE end @@ -1033,6 +1072,35 @@ def setup end end + sub_test_case(":show_column_type") do + def setup + columns = { + "count" => Arrow::UInt8Array.new([1, 2]), + "visible" => Arrow::BooleanArray.new([true, false]), + } + @table = Arrow::Table.new(columns) + end + + test(":list") do + assert_equal(<<-TABLE, @table.to_s(format: :list, show_column_type: false)) +==================== 0 ==================== +count: 1 +visible: true +==================== 1 ==================== +count: 2 +visible: false + TABLE + end + + test(":table") do + assert_equal(<<-TABLE, @table.to_s(format: :table, show_column_type: false)) + count visible +0 1 true +1 2 false + TABLE + end + end + sub_test_case("#==") do test("Arrow::Table") do assert do @@ -1058,13 +1126,14 @@ def setup test("Array: boolean") do filter = [nil, true, true, false, true, false, true, true] assert_equal(<<-TABLE, @table.filter(filter, @options).to_s) - count visible -0 (null) (null) -1 2 false -2 4 (null) -3 16 true -4 64 (null) -5 128 (null) + count visible + (uint8) (bool) +0 (null) (null) +1 2 false +2 4 (null) +3 16 true +4 64 (null) +5 128 (null) TABLE end @@ -1072,13 +1141,14 @@ def setup array = [nil, true, true, false, true, false, true, true] filter = Arrow::BooleanArray.new(array) assert_equal(<<-TABLE, @table.filter(filter, @options).to_s) - count visible -0 (null) (null) -1 2 false -2 4 (null) -3 16 true -4 64 (null) -5 128 (null) + count visible + (uint8) (bool) +0 (null) (null) +1 2 false +2 4 (null) +3 16 true +4 64 (null) +5 128 (null) TABLE end @@ -1090,13 +1160,14 @@ def setup ] filter = Arrow::ChunkedArray.new(filter_chunks) assert_equal(<<-TABLE, @table.filter(filter, @options).to_s) - count visible -0 (null) (null) -1 2 false -2 4 (null) -3 16 true -4 64 (null) -5 128 (null) + count visible + (uint8) (bool) +0 (null) (null) +1 2 false +2 4 (null) +3 16 true +4 64 (null) +5 128 (null) TABLE end end @@ -1105,20 +1176,22 @@ def setup test("Arrow: boolean") do indices = [1, 0, 2] assert_equal(<<-TABLE, @table.take(indices).to_s) - count visible -0 2 false -1 1 true -2 4 (null) + count visible + (uint8) (bool) +0 2 false +1 1 true +2 4 (null) TABLE end test("Arrow::Array") do indices = Arrow::Int16Array.new([1, 0, 2]) assert_equal(<<-TABLE, @table.take(indices).to_s) - count visible -0 2 false -1 1 true -2 4 (null) + count visible + (uint8) (bool) +0 2 false +1 1 true +2 4 (null) TABLE end @@ -1129,10 +1202,11 @@ def setup ] indices = Arrow::ChunkedArray.new(chunks) assert_equal(<<-TABLE, @table.take(indices).to_s) - count visible -0 2 false -1 1 true -2 4 (null) + count visible + (uint8) (bool) +0 2 false +1 1 true +2 4 (null) TABLE end end @@ -1144,9 +1218,10 @@ def setup table2 = Arrow::Table.new(b: [false]) concatenated = table1.concatenate([table2], unify_schemas: true) assert_equal(<<-TABLE, concatenated.to_s) - a b -0 true false -1 (null) false + a b + (bool) (bool) +0 true false +1 (null) false TABLE end end diff --git a/swift/.swiftlint.yml b/swift/.swiftlint.yml index d447bf9d5d97c..7e4da29f3741c 100644 --- a/swift/.swiftlint.yml +++ b/swift/.swiftlint.yml @@ -16,10 +16,14 @@ # under the License. included: + - Arrow/Package.swift - Arrow/Sources - Arrow/Tests + - ArrowFlight/Package.swift - ArrowFlight/Sources - ArrowFlight/Tests + - CDataWGo/Package.swift + - CDataWGo/Sources/go-swift excluded: - Arrow/Sources/Arrow/File_generated.swift - Arrow/Sources/Arrow/Message_generated.swift diff --git a/swift/Arrow/Package.swift b/swift/Arrow/Package.swift index 946eb999c798a..6f19136fd4292 100644 --- a/swift/Arrow/Package.swift +++ b/swift/Arrow/Package.swift @@ -26,28 +26,34 @@ let package = Package( .macOS(.v10_14) ], products: [ - // Products define the executables and libraries a package produces, and make them visible to other packages. .library( name: "Arrow", - targets: ["Arrow"]), + targets: ["Arrow"]) ], dependencies: [ // The latest version of flatbuffers v23.5.26 was built in May 26, 2023 // and therefore doesn't include the unaligned buffer swift changes. // This can be changed back to using the tag once a new version of // flatbuffers has been released. - .package(url: "https://github.com/google/flatbuffers.git", branch: "master") + .package(url: "https://github.com/google/flatbuffers.git", branch: "master"), + .package( + url: "https://github.com/apple/swift-atomics.git", + .upToNextMajor(from: "1.2.0") // or `.upToNextMinor + ) ], targets: [ - // Targets are the basic building blocks of a package. A target can define a module or a test suite. - // Targets can depend on other targets in this package, and on products in packages this package depends on. + .target( + name: "ArrowC", + path: "Sources/ArrowC" + ), .target( name: "Arrow", - dependencies: [ - .product(name: "FlatBuffers", package: "flatbuffers") + dependencies: ["ArrowC", + .product(name: "FlatBuffers", package: "flatbuffers"), + .product(name: "Atomics", package: "swift-atomics") ]), .testTarget( name: "ArrowTests", - dependencies: ["Arrow"]), + dependencies: ["Arrow", "ArrowC"]) ] ) diff --git a/swift/Arrow/Sources/Arrow/ArrowArray.swift b/swift/Arrow/Sources/Arrow/ArrowArray.swift index 88b43e63a92b7..32b6ba1704511 100644 --- a/swift/Arrow/Sources/Arrow/ArrowArray.swift +++ b/swift/Arrow/Sources/Arrow/ArrowArray.swift @@ -17,16 +17,29 @@ import Foundation -public class ArrowArrayHolder { +public protocol ArrowArrayHolder { + var type: ArrowType {get} + var length: UInt {get} + var nullCount: UInt {get} + var array: Any {get} + var data: ArrowData {get} + var getBufferData: () -> [Data] {get} + var getBufferDataSizes: () -> [Int] {get} + var getArrowColumn: (ArrowField, [ArrowArrayHolder]) throws -> ArrowColumn {get} +} + +public class ArrowArrayHolderImpl: ArrowArrayHolder { + public let array: Any + public let data: ArrowData public let type: ArrowType public let length: UInt public let nullCount: UInt - public let array: Any public let getBufferData: () -> [Data] public let getBufferDataSizes: () -> [Int] - private let getArrowColumn: (ArrowField, [ArrowArrayHolder]) throws -> ArrowColumn + public let getArrowColumn: (ArrowField, [ArrowArrayHolder]) throws -> ArrowColumn public init(_ arrowArray: ArrowArray) { self.array = arrowArray + self.data = arrowArray.arrowData self.length = arrowArray.length self.type = arrowArray.arrowData.type self.nullCount = arrowArray.nullCount @@ -60,19 +73,9 @@ public class ArrowArrayHolder { return ArrowColumn(field, chunked: ChunkedArrayHolder(try ChunkedArray(arrays))) } } - - public static func makeArrowColumn(_ field: ArrowField, - holders: [ArrowArrayHolder] - ) -> Result { - do { - return .success(try holders[0].getArrowColumn(field, holders)) - } catch { - return .failure(.runtimeError("\(error)")) - } - } } -public class ArrowArray: AsString { +public class ArrowArray: AsString, AnyArray { public typealias ItemType = T public let arrowData: ArrowData public var nullCount: UInt {return self.arrowData.nullCount} @@ -101,6 +104,14 @@ public class ArrowArray: AsString { return "\(self[index]!)" } + + public func asAny(_ index: UInt) -> Any? { + if self[index] == nil { + return nil + } + + return self[index]! + } } public class FixedArray: ArrowArray { diff --git a/swift/Arrow/Sources/Arrow/ArrowArrayBuilder.swift b/swift/Arrow/Sources/Arrow/ArrowArrayBuilder.swift index b78f0ccd74997..40f9628d8f162 100644 --- a/swift/Arrow/Sources/Arrow/ArrowArrayBuilder.swift +++ b/swift/Arrow/Sources/Arrow/ArrowArrayBuilder.swift @@ -17,7 +17,12 @@ import Foundation -public class ArrowArrayBuilder> { +public protocol ArrowArrayHolderBuilder { + func toHolder() throws -> ArrowArrayHolder + func appendAny(_ val: Any?) +} + +public class ArrowArrayBuilder>: ArrowArrayHolderBuilder { let type: ArrowType let bufferBuilder: T public var length: UInt {return self.bufferBuilder.length} @@ -30,10 +35,26 @@ public class ArrowArrayBuilder> self.bufferBuilder = try T() } + public func append(_ vals: T.ItemType?...) { + for val in vals { + self.bufferBuilder.append(val) + } + } + + public func append(_ vals: [T.ItemType?]) { + for val in vals { + self.bufferBuilder.append(val) + } + } + public func append(_ val: T.ItemType?) { self.bufferBuilder.append(val) } + public func appendAny(_ val: Any?) { + self.bufferBuilder.append(val as? T.ItemType) + } + public func finish() throws -> ArrowArray { let buffers = self.bufferBuilder.finish() let arrowData = try ArrowData(self.type, buffers: buffers, nullCount: self.nullCount) @@ -43,6 +64,10 @@ public class ArrowArrayBuilder> public func getStride() -> Int { return self.type.getStride() } + + public func toHolder() throws -> ArrowArrayHolder { + return try ArrowArrayHolderImpl(self.finish()) + } } public class NumberArrayBuilder: ArrowArrayBuilder, FixedArray> { diff --git a/swift/Arrow/Sources/Arrow/ArrowBuffer.swift b/swift/Arrow/Sources/Arrow/ArrowBuffer.swift index 4ac4eb93c91db..1ff53cd7dd5d9 100644 --- a/swift/Arrow/Sources/Arrow/ArrowBuffer.swift +++ b/swift/Arrow/Sources/Arrow/ArrowBuffer.swift @@ -22,16 +22,20 @@ public class ArrowBuffer { static let maxLength = UInt.max fileprivate(set) var length: UInt let capacity: UInt - let rawPointer: UnsafeMutableRawPointer + public let rawPointer: UnsafeMutableRawPointer + let isMemoryOwner: Bool - init(length: UInt, capacity: UInt, rawPointer: UnsafeMutableRawPointer) { + init(length: UInt, capacity: UInt, rawPointer: UnsafeMutableRawPointer, isMemoryOwner: Bool = true) { self.length = length self.capacity = capacity self.rawPointer = rawPointer + self.isMemoryOwner = isMemoryOwner } deinit { - self.rawPointer.deallocate() + if isMemoryOwner { + self.rawPointer.deallocate() + } } func append(to data: inout Data) { @@ -39,6 +43,13 @@ public class ArrowBuffer { data.append(ptr, count: Int(capacity)) } + static func createEmptyBuffer() -> ArrowBuffer { + return ArrowBuffer( + length: 0, + capacity: 0, + rawPointer: UnsafeMutableRawPointer.allocate(byteCount: 0, alignment: .zero)) + } + static func createBuffer(_ data: [UInt8], length: UInt) -> ArrowBuffer { let byteCount = UInt(data.count) let capacity = alignTo64(byteCount) diff --git a/swift/Arrow/Sources/Arrow/ArrowCExporter.swift b/swift/Arrow/Sources/Arrow/ArrowCExporter.swift new file mode 100644 index 0000000000000..aa93f0cb7e389 --- /dev/null +++ b/swift/Arrow/Sources/Arrow/ArrowCExporter.swift @@ -0,0 +1,135 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import Foundation +import ArrowC +import Atomics + +// The memory used by UnsafeAtomic is not automatically +// reclaimed. Since this value is initialized once +// and used until the program/app is closed it's +// memory will be released on program/app exit +let exportDataCounter: UnsafeAtomic = .create(0) + +public class ArrowCExporter { + private class ExportData { + let id: Int + init() { + id = exportDataCounter.loadThenWrappingIncrement(ordering: .relaxed) + ArrowCExporter.exportedData[id] = self + } + } + + private class ExportSchema: ExportData { + public let arrowTypeNameCstr: UnsafePointer + public let nameCstr: UnsafePointer + private let arrowType: ArrowType + private let name: String + private let arrowTypeName: String + init(_ arrowType: ArrowType, name: String = "") throws { + self.arrowType = arrowType + // keeping the name str to ensure the cstring buffer remains valid + self.name = name + self.arrowTypeName = try arrowType.cDataFormatId + self.nameCstr = (self.name as NSString).utf8String! + self.arrowTypeNameCstr = (self.arrowTypeName as NSString).utf8String! + super.init() + } + } + + private class ExportArray: ExportData { + private let arrowData: ArrowData + private(set) var data = [UnsafeRawPointer?]() + private(set) var buffers: UnsafeMutablePointer + init(_ arrowData: ArrowData) { + // keep a reference to the ArrowData + // obj so the memory doesn't get + // deallocated + self.arrowData = arrowData + for arrowBuffer in arrowData.buffers { + data.append(arrowBuffer.rawPointer) + } + + self.buffers = UnsafeMutablePointer(mutating: data) + super.init() + } + } + + private static var exportedData = [Int: ExportData]() + public init() {} + + public func exportType(_ cSchema: inout ArrowC.ArrowSchema, arrowType: ArrowType, name: String = "") -> + Result { + do { + let exportSchema = try ExportSchema(arrowType, name: name) + cSchema.format = exportSchema.arrowTypeNameCstr + cSchema.name = exportSchema.nameCstr + cSchema.private_data = + UnsafeMutableRawPointer(mutating: UnsafeRawPointer(bitPattern: exportSchema.id)) + cSchema.release = {(data: UnsafeMutablePointer?) in + let arraySchema = data!.pointee + let exportId = Int(bitPattern: arraySchema.private_data) + guard ArrowCExporter.exportedData[exportId] != nil else { + fatalError("Export schema not found with id \(exportId)") + } + + // the data associated with this exportSchema object + // which includes the C strings for the format and name + // be deallocated upon removal + ArrowCExporter.exportedData.removeValue(forKey: exportId) + ArrowC.ArrowSwiftClearReleaseSchema(data) + } + } catch { + return .failure(.unknownError("\(error)")) + } + return .success(true) + } + + public func exportField(_ schema: inout ArrowC.ArrowSchema, field: ArrowField) -> + Result { + return exportType(&schema, arrowType: field.type, name: field.name) + } + + public func exportArray(_ cArray: inout ArrowC.ArrowArray, arrowData: ArrowData) { + let exportArray = ExportArray(arrowData) + cArray.buffers = exportArray.buffers + cArray.length = Int64(arrowData.length) + cArray.null_count = Int64(arrowData.nullCount) + cArray.n_buffers = Int64(arrowData.buffers.count) + // Swift Arrow does not currently support children or dictionaries + // This will need to be updated once support has been added + cArray.n_children = 0 + cArray.children = nil + cArray.dictionary = nil + cArray.private_data = + UnsafeMutableRawPointer(mutating: UnsafeRawPointer(bitPattern: exportArray.id)) + cArray.release = {(data: UnsafeMutablePointer?) in + let arrayData = data!.pointee + let exportId = Int(bitPattern: arrayData.private_data) + guard ArrowCExporter.exportedData[exportId] != nil else { + fatalError("Export data not found with id \(exportId)") + } + + // the data associated with this exportArray object + // which includes the entire arrowData object + // and the buffers UnsafeMutablePointer[] will + // be deallocated upon removal + ArrowCExporter.exportedData.removeValue(forKey: exportId) + ArrowC.ArrowSwiftClearReleaseArray(data) + } + } +} diff --git a/swift/Arrow/Sources/Arrow/ArrowCImporter.swift b/swift/Arrow/Sources/Arrow/ArrowCImporter.swift new file mode 100644 index 0000000000000..8a4cf47fc0b43 --- /dev/null +++ b/swift/Arrow/Sources/Arrow/ArrowCImporter.swift @@ -0,0 +1,179 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import Foundation +import ArrowC + +public class ImportArrayHolder: ArrowArrayHolder { + let cArrayPtr: UnsafePointer + public var type: ArrowType {self.holder.type} + public var length: UInt {self.holder.length} + public var nullCount: UInt {self.holder.nullCount} + public var array: Any {self.holder.array} + public var data: ArrowData {self.holder.data} + public var getBufferData: () -> [Data] {self.holder.getBufferData} + public var getBufferDataSizes: () -> [Int] {self.holder.getBufferDataSizes} + public var getArrowColumn: (ArrowField, [ArrowArrayHolder]) throws -> ArrowColumn {self.holder.getArrowColumn} + private let holder: ArrowArrayHolder + init(_ holder: ArrowArrayHolder, cArrayPtr: UnsafePointer) { + self.cArrayPtr = cArrayPtr + self.holder = holder + } + + deinit { + if self.cArrayPtr.pointee.release != nil { + ArrowCImporter.release(self.cArrayPtr) + } + } +} + +public class ArrowCImporter { + private func appendToBuffer( + _ cBuffer: UnsafeRawPointer?, + arrowBuffers: inout [ArrowBuffer], + length: UInt) { + if cBuffer == nil { + arrowBuffers.append(ArrowBuffer.createEmptyBuffer()) + return + } + + let pointer = UnsafeMutableRawPointer(mutating: cBuffer)! + arrowBuffers.append( + ArrowBuffer(length: length, capacity: length, rawPointer: pointer, isMemoryOwner: false)) + } + + public init() {} + + public func importType(_ cArrow: String, name: String = "") -> + Result { + do { + let type = try ArrowType.fromCDataFormatId(cArrow) + return .success(ArrowField(name, type: ArrowType(type.info), isNullable: true)) + } catch { + return .failure(.invalid("Error occurred while attempting to import type: \(error)")) + } + } + + public func importField(_ cSchema: ArrowC.ArrowSchema) -> + Result { + if cSchema.n_children > 0 { + ArrowCImporter.release(cSchema) + return .failure(.invalid("Children currently not supported")) + } else if cSchema.dictionary != nil { + ArrowCImporter.release(cSchema) + return .failure(.invalid("Dictinoary types currently not supported")) + } + + switch importType( + String(cString: cSchema.format), name: String(cString: cSchema.name)) { + case .success(let field): + ArrowCImporter.release(cSchema) + return .success(field) + case .failure(let err): + ArrowCImporter.release(cSchema) + return .failure(err) + } + } + + public func importArray( + _ cArray: UnsafePointer, + arrowType: ArrowType, + isNullable: Bool = false + ) -> Result { + let arrowField = ArrowField("", type: arrowType, isNullable: isNullable) + return importArray(cArray, arrowField: arrowField) + } + + public func importArray( // swiftlint:disable:this cyclomatic_complexity function_body_length + _ cArrayPtr: UnsafePointer, + arrowField: ArrowField + ) -> Result { + let cArray = cArrayPtr.pointee + if cArray.null_count < 0 { + ArrowCImporter.release(cArrayPtr) + return .failure(.invalid("Uncomputed null count is not supported")) + } else if cArray.n_children > 0 { + ArrowCImporter.release(cArrayPtr) + return .failure(.invalid("Children currently not supported")) + } else if cArray.dictionary != nil { + ArrowCImporter.release(cArrayPtr) + return .failure(.invalid("Dictionary types currently not supported")) + } else if cArray.offset != 0 { + ArrowCImporter.release(cArrayPtr) + return .failure(.invalid("Offset of 0 is required but found offset: \(cArray.offset)")) + } + + let arrowType = arrowField.type + let length = UInt(cArray.length) + let nullCount = UInt(cArray.null_count) + var arrowBuffers = [ArrowBuffer]() + + if cArray.n_buffers > 0 { + if cArray.buffers == nil { + ArrowCImporter.release(cArrayPtr) + return .failure(.invalid("C array buffers is nil")) + } + + switch arrowType.info { + case .variableInfo: + if cArray.n_buffers != 3 { + ArrowCImporter.release(cArrayPtr) + return .failure( + .invalid("Variable buffer count expected 3 but found \(cArray.n_buffers)")) + } + + appendToBuffer(cArray.buffers[0], arrowBuffers: &arrowBuffers, length: UInt(ceil(Double(length) / 8))) + appendToBuffer(cArray.buffers[1], arrowBuffers: &arrowBuffers, length: length) + let lastOffsetLength = cArray.buffers[1]! + .advanced(by: Int(length) * MemoryLayout.stride) + .load(as: Int32.self) + appendToBuffer(cArray.buffers[2], arrowBuffers: &arrowBuffers, length: UInt(lastOffsetLength)) + default: + if cArray.n_buffers != 2 { + ArrowCImporter.release(cArrayPtr) + return .failure(.invalid("Expected buffer count 2 but found \(cArray.n_buffers)")) + } + + appendToBuffer(cArray.buffers[0], arrowBuffers: &arrowBuffers, length: UInt(ceil(Double(length) / 8))) + appendToBuffer(cArray.buffers[1], arrowBuffers: &arrowBuffers, length: length) + } + } + + switch makeArrayHolder(arrowField, buffers: arrowBuffers, nullCount: nullCount) { + case .success(let holder): + return .success(ImportArrayHolder(holder, cArrayPtr: cArrayPtr)) + case .failure(let err): + ArrowCImporter.release(cArrayPtr) + return .failure(err) + } + } + + public static func release(_ cArrayPtr: UnsafePointer) { + if cArrayPtr.pointee.release != nil { + let cSchemaMutablePtr = UnsafeMutablePointer(mutating: cArrayPtr) + cArrayPtr.pointee.release(cSchemaMutablePtr) + } + } + + public static func release(_ cSchema: ArrowC.ArrowSchema) { + if cSchema.release != nil { + let cSchemaPtr = UnsafeMutablePointer.allocate(capacity: 1) + cSchemaPtr.initialize(to: cSchema) + cSchema.release(cSchemaPtr) + } + } +} diff --git a/swift/Arrow/Sources/Arrow/ArrowDecoder.swift b/swift/Arrow/Sources/Arrow/ArrowDecoder.swift new file mode 100644 index 0000000000000..9aa8a65137d28 --- /dev/null +++ b/swift/Arrow/Sources/Arrow/ArrowDecoder.swift @@ -0,0 +1,368 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import Foundation + +public class ArrowDecoder: Decoder { + var rbIndex: UInt = 0 + public var codingPath: [CodingKey] = [] + public var userInfo: [CodingUserInfoKey: Any] = [:] + public let rb: RecordBatch + public let nameToCol: [String: ArrowArrayHolder] + public let columns: [ArrowArrayHolder] + public init(_ decoder: ArrowDecoder) { + self.userInfo = decoder.userInfo + self.codingPath = decoder.codingPath + self.rb = decoder.rb + self.columns = decoder.columns + self.nameToCol = decoder.nameToCol + self.rbIndex = decoder.rbIndex + } + + public init(_ rb: RecordBatch) { + self.rb = rb + var colMapping = [String: ArrowArrayHolder]() + var columns = [ArrowArrayHolder]() + for index in 0..(_ type: T.Type) throws -> [T] { + var output = [T]() + for index in 0..(keyedBy type: Key.Type + ) -> KeyedDecodingContainer where Key: CodingKey { + let container = ArrowKeyedDecoding(self, codingPath: codingPath) + return KeyedDecodingContainer(container) + } + + public func unkeyedContainer() -> UnkeyedDecodingContainer { + return ArrowUnkeyedDecoding(self, codingPath: codingPath) + } + + public func singleValueContainer() -> SingleValueDecodingContainer { + return ArrowSingleValueDecoding(self, codingPath: codingPath) + } + + func getCol(_ name: String) throws -> AnyArray { + guard let col = self.nameToCol[name] else { + throw ArrowError.invalid("Column for key \"\(name)\" not found") + } + + guard let anyArray = col.array as? AnyArray else { + throw ArrowError.invalid("Unable to convert array to AnyArray") + } + + return anyArray + } + + func getCol(_ index: Int) throws -> AnyArray { + if index >= self.columns.count { + throw ArrowError.outOfBounds(index: Int64(index)) + } + + guard let anyArray = self.columns[index].array as? AnyArray else { + throw ArrowError.invalid("Unable to convert array to AnyArray") + } + + return anyArray + } + + func doDecode(_ key: CodingKey) throws -> T? { + let array: AnyArray = try self.getCol(key.stringValue) + return array.asAny(self.rbIndex) as? T + } + + func doDecode(_ col: Int) throws -> T? { + let array: AnyArray = try self.getCol(col) + return array.asAny(self.rbIndex) as? T + } + + func isNull(_ key: CodingKey) throws -> Bool { + let array: AnyArray = try self.getCol(key.stringValue) + return array.asAny(self.rbIndex) == nil + } + + func isNull(_ col: Int) throws -> Bool { + let array: AnyArray = try self.getCol(col) + return array.asAny(self.rbIndex) == nil + } +} + +private struct ArrowUnkeyedDecoding: UnkeyedDecodingContainer { + var codingPath: [CodingKey] + var count: Int? = 0 + var isAtEnd: Bool = false + var currentIndex: Int = 0 + let decoder: ArrowDecoder + + init(_ decoder: ArrowDecoder, codingPath: [CodingKey]) { + self.decoder = decoder + self.codingPath = codingPath + self.count = self.decoder.columns.count + } + + mutating func increment() { + self.currentIndex += 1 + self.isAtEnd = self.currentIndex >= self.count! + } + + mutating func decodeNil() throws -> Bool { + defer {increment()} + return try self.decoder.isNull(self.currentIndex) + } + + mutating func decode(_ type: T.Type) throws -> T where T: Decodable { + if type == Int8?.self || type == Int16?.self || + type == Int32?.self || type == Int64?.self || + type == UInt8?.self || type == UInt16?.self || + type == UInt32?.self || type == UInt64?.self || + type == String?.self || type == Double?.self || + type == Float?.self || type == Date?.self || + type == Int8.self || type == Int16.self || + type == Int32.self || type == Int64.self || + type == UInt8.self || type == UInt16.self || + type == UInt32.self || type == UInt64.self || + type == String.self || type == Double.self || + type == Float.self || type == Date.self { + defer {increment()} + return try self.decoder.doDecode(self.currentIndex)! + } else { + throw ArrowError.invalid("Type \(type) is currently not supported") + } + } + + func nestedContainer( + keyedBy type: NestedKey.Type + ) throws -> KeyedDecodingContainer where NestedKey: CodingKey { + throw ArrowError.invalid("Nested decoding is currently not supported.") + } + + func nestedUnkeyedContainer() throws -> UnkeyedDecodingContainer { + throw ArrowError.invalid("Nested decoding is currently not supported.") + } + + func superDecoder() throws -> Decoder { + throw ArrowError.invalid("super decoding is currently not supported.") + } +} + +private struct ArrowKeyedDecoding: KeyedDecodingContainerProtocol { + var codingPath = [CodingKey]() + var allKeys = [Key]() + let decoder: ArrowDecoder + + init(_ decoder: ArrowDecoder, codingPath: [CodingKey]) { + self.decoder = decoder + self.codingPath = codingPath + } + + func contains(_ key: Key) -> Bool { + return self.decoder.nameToCol.keys.contains(key.stringValue) + } + + func decodeNil(forKey key: Key) throws -> Bool { + try self.decoder.isNull(key) + } + + func decode(_ type: Bool.Type, forKey key: Key) throws -> Bool { + return try self.decoder.doDecode(key)! + } + + func decode(_ type: String.Type, forKey key: Key) throws -> String { + return try self.decoder.doDecode(key)! + } + + func decode(_ type: Double.Type, forKey key: Key) throws -> Double { + return try self.decoder.doDecode(key)! + } + + func decode(_ type: Float.Type, forKey key: Key) throws -> Float { + return try self.decoder.doDecode(key)! + } + + func decode(_ type: Int.Type, forKey key: Key) throws -> Int { + throw ArrowError.invalid( + "Int type is not supported (please use Int8, Int16, Int32 or Int64)") + } + + func decode(_ type: Int8.Type, forKey key: Key) throws -> Int8 { + return try self.decoder.doDecode(key)! + } + + func decode(_ type: Int16.Type, forKey key: Key) throws -> Int16 { + return try self.decoder.doDecode(key)! + } + + func decode(_ type: Int32.Type, forKey key: Key) throws -> Int32 { + return try self.decoder.doDecode(key)! + } + + func decode(_ type: Int64.Type, forKey key: Key) throws -> Int64 { + return try self.decoder.doDecode(key)! + } + + func decode(_ type: UInt.Type, forKey key: Key) throws -> UInt { + throw ArrowError.invalid( + "UInt type is not supported (please use UInt8, UInt16, UInt32 or UInt64)") + } + + func decode(_ type: UInt8.Type, forKey key: Key) throws -> UInt8 { + return try self.decoder.doDecode(key)! + } + + func decode(_ type: UInt16.Type, forKey key: Key) throws -> UInt16 { + return try self.decoder.doDecode(key)! + } + + func decode(_ type: UInt32.Type, forKey key: Key) throws -> UInt32 { + return try self.decoder.doDecode(key)! + } + + func decode(_ type: UInt64.Type, forKey key: Key) throws -> UInt64 { + return try self.decoder.doDecode(key)! + } + + func decode(_ type: T.Type, forKey key: Key) throws -> T where T: Decodable { + if type == Date.self { + return try self.decoder.doDecode(key)! + } else { + throw ArrowError.invalid("Type \(type) is currently not supported") + } + } + + func nestedContainer( + keyedBy type: NestedKey.Type, + forKey key: Key + ) throws -> KeyedDecodingContainer where NestedKey: CodingKey { + throw ArrowError.invalid("Nested decoding is currently not supported.") + } + + func nestedUnkeyedContainer(forKey key: Key) throws -> UnkeyedDecodingContainer { + throw ArrowError.invalid("Nested decoding is currently not supported.") + } + + func superDecoder() throws -> Decoder { + throw ArrowError.invalid("super decoding is currently not supported.") + } + + func superDecoder(forKey key: Key) throws -> Decoder { + throw ArrowError.invalid("super decoding is currently not supported.") + } +} + +private struct ArrowSingleValueDecoding: SingleValueDecodingContainer { + var codingPath = [CodingKey]() + let decoder: ArrowDecoder + + init(_ decoder: ArrowDecoder, codingPath: [CodingKey]) { + self.decoder = decoder + self.codingPath = codingPath + } + + func decodeNil() -> Bool { + do { + return try self.decoder.isNull(0) + } catch { + return false + } + } + + func decode(_ type: Bool.Type) throws -> Bool { + return try self.decoder.doDecode(0)! + } + + func decode(_ type: String.Type) throws -> String { + return try self.decoder.doDecode(0)! + } + + func decode(_ type: Double.Type) throws -> Double { + return try self.decoder.doDecode(0)! + } + + func decode(_ type: Float.Type) throws -> Float { + return try self.decoder.doDecode(0)! + } + + func decode(_ type: Int.Type) throws -> Int { + throw ArrowError.invalid( + "Int type is not supported (please use Int8, Int16, Int32 or Int64)") + } + + func decode(_ type: Int8.Type) throws -> Int8 { + return try self.decoder.doDecode(0)! + } + + func decode(_ type: Int16.Type) throws -> Int16 { + return try self.decoder.doDecode(0)! + } + + func decode(_ type: Int32.Type) throws -> Int32 { + return try self.decoder.doDecode(0)! + } + + func decode(_ type: Int64.Type) throws -> Int64 { + return try self.decoder.doDecode(0)! + } + + func decode(_ type: UInt.Type) throws -> UInt { + throw ArrowError.invalid( + "UInt type is not supported (please use UInt8, UInt16, UInt32 or UInt64)") + } + + func decode(_ type: UInt8.Type) throws -> UInt8 { + return try self.decoder.doDecode(0)! + } + + func decode(_ type: UInt16.Type) throws -> UInt16 { + return try self.decoder.doDecode(0)! + } + + func decode(_ type: UInt32.Type) throws -> UInt32 { + return try self.decoder.doDecode(0)! + } + + func decode(_ type: UInt64.Type) throws -> UInt64 { + return try self.decoder.doDecode(0)! + } + + func decode(_ type: T.Type) throws -> T where T: Decodable { + if type == Int8.self || type == Int16.self || + type == Int32.self || type == Int64.self || + type == UInt8.self || type == UInt16.self || + type == UInt32.self || type == UInt64.self || + type == String.self || type == Double.self || + type == Float.self || type == Date.self { + return try self.decoder.doDecode(0)! + } else { + throw ArrowError.invalid("Type \(type) is currently not supported") + } + } +} diff --git a/swift/Arrow/Sources/Arrow/ArrowReaderHelper.swift b/swift/Arrow/Sources/Arrow/ArrowReaderHelper.swift index fb4a13b766f10..c701653ecb2c9 100644 --- a/swift/Arrow/Sources/Arrow/ArrowReaderHelper.swift +++ b/swift/Arrow/Sources/Arrow/ArrowReaderHelper.swift @@ -23,7 +23,7 @@ private func makeBinaryHolder(_ buffers: [ArrowBuffer], do { let arrowType = ArrowType(ArrowType.ArrowBinary) let arrowData = try ArrowData(arrowType, buffers: buffers, nullCount: nullCount) - return .success(ArrowArrayHolder(BinaryArray(arrowData))) + return .success(ArrowArrayHolderImpl(BinaryArray(arrowData))) } catch let error as ArrowError { return .failure(error) } catch { @@ -36,7 +36,7 @@ private func makeStringHolder(_ buffers: [ArrowBuffer], do { let arrowType = ArrowType(ArrowType.ArrowString) let arrowData = try ArrowData(arrowType, buffers: buffers, nullCount: nullCount) - return .success(ArrowArrayHolder(StringArray(arrowData))) + return .success(ArrowArrayHolderImpl(StringArray(arrowData))) } catch let error as ArrowError { return .failure(error) } catch { @@ -51,11 +51,11 @@ private func makeDateHolder(_ field: ArrowField, do { if field.type.id == .date32 { let arrowData = try ArrowData(field.type, buffers: buffers, nullCount: nullCount) - return .success(ArrowArrayHolder(Date32Array(arrowData))) + return .success(ArrowArrayHolderImpl(Date32Array(arrowData))) } let arrowData = try ArrowData(field.type, buffers: buffers, nullCount: nullCount) - return .success(ArrowArrayHolder(Date64Array(arrowData))) + return .success(ArrowArrayHolderImpl(Date64Array(arrowData))) } catch let error as ArrowError { return .failure(error) } catch { @@ -71,7 +71,7 @@ private func makeTimeHolder(_ field: ArrowField, if field.type.id == .time32 { if let arrowType = field.type as? ArrowTypeTime32 { let arrowData = try ArrowData(arrowType, buffers: buffers, nullCount: nullCount) - return .success(ArrowArrayHolder(FixedArray(arrowData))) + return .success(ArrowArrayHolderImpl(FixedArray(arrowData))) } else { return .failure(.invalid("Incorrect field type for time: \(field.type)")) } @@ -79,7 +79,7 @@ private func makeTimeHolder(_ field: ArrowField, if let arrowType = field.type as? ArrowTypeTime64 { let arrowData = try ArrowData(arrowType, buffers: buffers, nullCount: nullCount) - return .success(ArrowArrayHolder(FixedArray(arrowData))) + return .success(ArrowArrayHolderImpl(FixedArray(arrowData))) } else { return .failure(.invalid("Incorrect field type for time: \(field.type)")) } @@ -95,7 +95,7 @@ private func makeBoolHolder(_ buffers: [ArrowBuffer], do { let arrowType = ArrowType(ArrowType.ArrowBool) let arrowData = try ArrowData(arrowType, buffers: buffers, nullCount: nullCount) - return .success(ArrowArrayHolder(BoolArray(arrowData))) + return .success(ArrowArrayHolderImpl(BoolArray(arrowData))) } catch let error as ArrowError { return .failure(error) } catch { @@ -109,7 +109,7 @@ private func makeFixedHolder( ) -> Result { do { let arrowData = try ArrowData(field.type, buffers: buffers, nullCount: nullCount) - return .success(ArrowArrayHolder(FixedArray(arrowData))) + return .success(ArrowArrayHolderImpl(FixedArray(arrowData))) } catch let error as ArrowError { return .failure(error) } catch { diff --git a/swift/Arrow/Sources/Arrow/ArrowSchema.swift b/swift/Arrow/Sources/Arrow/ArrowSchema.swift index 45f13a1551c3d..65c506d51cdd6 100644 --- a/swift/Arrow/Sources/Arrow/ArrowSchema.swift +++ b/swift/Arrow/Sources/Arrow/ArrowSchema.swift @@ -17,9 +17,9 @@ import Foundation public class ArrowField { - let type: ArrowType - let name: String - let isNullable: Bool + public let type: ArrowType + public let name: String + public let isNullable: Bool init(_ name: String, type: ArrowType, isNullable: Bool) { self.name = name diff --git a/swift/Arrow/Sources/Arrow/ArrowTable.swift b/swift/Arrow/Sources/Arrow/ArrowTable.swift index 7677fb4f33a19..b9d15154c4f94 100644 --- a/swift/Arrow/Sources/Arrow/ArrowTable.swift +++ b/swift/Arrow/Sources/Arrow/ArrowTable.swift @@ -64,7 +64,7 @@ public class ArrowTable { let builder = ArrowTable.Builder() for index in 0.. Result { + do { + return .success(try holders[0].getArrowColumn(field, holders)) + } catch { + return .failure(.runtimeError("\(error)")) + } + } + public class Builder { let schemaBuilder = ArrowSchema.Builder() var columns = [ArrowColumn]() @@ -172,6 +183,11 @@ public class RecordBatch { return (arrayHolder.array as! ArrowArray) // swiftlint:disable:this force_cast } + public func anyData(for columnIndex: Int) -> AnyArray { + let arrayHolder = column(columnIndex) + return (arrayHolder.array as! AnyArray) // swiftlint:disable:this force_cast + } + public func column(_ index: Int) -> ArrowArrayHolder { return self.columns[index] } diff --git a/swift/Arrow/Sources/Arrow/ArrowType.swift b/swift/Arrow/Sources/Arrow/ArrowType.swift index f5a869f7cdaff..e1ada4b9734ea 100644 --- a/swift/Arrow/Sources/Arrow/ArrowType.swift +++ b/swift/Arrow/Sources/Arrow/ArrowType.swift @@ -90,6 +90,17 @@ public class ArrowTypeTime32: ArrowType { self.unit = unit super.init(ArrowType.ArrowTime32) } + + public override var cDataFormatId: String { + get throws { + switch self.unit { + case .milliseconds: + return "ttm" + case .seconds: + return "tts" + } + } + } } public class ArrowTypeTime64: ArrowType { @@ -98,6 +109,17 @@ public class ArrowTypeTime64: ArrowType { self.unit = unit super.init(ArrowType.ArrowTime64) } + + public override var cDataFormatId: String { + get throws { + switch self.unit { + case .microseconds: + return "ttu" + case .nanoseconds: + return "ttn" + } + } + } } public class ArrowType { @@ -209,6 +231,100 @@ public class ArrowType { fatalError("Stride requested for unknown type: \(self)") } } + + public var cDataFormatId: String { + get throws { + switch self.id { + case ArrowTypeId.int8: + return "c" + case ArrowTypeId.int16: + return "s" + case ArrowTypeId.int32: + return "i" + case ArrowTypeId.int64: + return "l" + case ArrowTypeId.uint8: + return "C" + case ArrowTypeId.uint16: + return "S" + case ArrowTypeId.uint32: + return "I" + case ArrowTypeId.uint64: + return "L" + case ArrowTypeId.float: + return "f" + case ArrowTypeId.double: + return "g" + case ArrowTypeId.boolean: + return "b" + case ArrowTypeId.date32: + return "tdD" + case ArrowTypeId.date64: + return "tdm" + case ArrowTypeId.time32: + if let time32 = self as? ArrowTypeTime32 { + return try time32.cDataFormatId + } + return "tts" + case ArrowTypeId.time64: + if let time64 = self as? ArrowTypeTime64 { + return try time64.cDataFormatId + } + return "ttu" + case ArrowTypeId.binary: + return "z" + case ArrowTypeId.string: + return "u" + default: + throw ArrowError.notImplemented + } + } + } + + public static func fromCDataFormatId( // swiftlint:disable:this cyclomatic_complexity + _ from: String) throws -> ArrowType { + if from == "c" { + return ArrowType(ArrowType.ArrowInt8) + } else if from == "s" { + return ArrowType(ArrowType.ArrowInt16) + } else if from == "i" { + return ArrowType(ArrowType.ArrowInt32) + } else if from == "l" { + return ArrowType(ArrowType.ArrowInt64) + } else if from == "C" { + return ArrowType(ArrowType.ArrowUInt8) + } else if from == "S" { + return ArrowType(ArrowType.ArrowUInt16) + } else if from == "I" { + return ArrowType(ArrowType.ArrowUInt32) + } else if from == "L" { + return ArrowType(ArrowType.ArrowUInt64) + } else if from == "f" { + return ArrowType(ArrowType.ArrowFloat) + } else if from == "g" { + return ArrowType(ArrowType.ArrowDouble) + } else if from == "b" { + return ArrowType(ArrowType.ArrowBool) + } else if from == "tdD" { + return ArrowType(ArrowType.ArrowDate32) + } else if from == "tdm" { + return ArrowType(ArrowType.ArrowDate64) + } else if from == "tts" { + return ArrowTypeTime32(.seconds) + } else if from == "ttm" { + return ArrowTypeTime32(.milliseconds) + } else if from == "ttu" { + return ArrowTypeTime64(.microseconds) + } else if from == "ttn" { + return ArrowTypeTime64(.nanoseconds) + } else if from == "z" { + return ArrowType(ArrowType.ArrowBinary) + } else if from == "u" { + return ArrowType(ArrowType.ArrowString) + } + + throw ArrowError.notImplemented + } } extension ArrowType.Info: Equatable { diff --git a/swift/Arrow/Sources/Arrow/ChunkedArray.swift b/swift/Arrow/Sources/Arrow/ChunkedArray.swift index 3a06aa46550df..c5ccfe4aec0e6 100644 --- a/swift/Arrow/Sources/Arrow/ChunkedArray.swift +++ b/swift/Arrow/Sources/Arrow/ChunkedArray.swift @@ -17,6 +17,11 @@ import Foundation +public protocol AnyArray { + func asAny(_ index: UInt) -> Any? + var length: UInt {get} +} + public protocol AsString { func asString(_ index: UInt) -> String } diff --git a/swift/Arrow/Sources/ArrowC/ArrowCData.c b/swift/Arrow/Sources/ArrowC/ArrowCData.c new file mode 100644 index 0000000000000..fe0f80899719b --- /dev/null +++ b/swift/Arrow/Sources/ArrowC/ArrowCData.c @@ -0,0 +1,31 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include +#include "include/ArrowCData.h" + +void ArrowSwiftClearReleaseSchema(struct ArrowSchema* arrowSchema) { + if(arrowSchema) { + arrowSchema->release = NULL; + } +} + +void ArrowSwiftClearReleaseArray(struct ArrowArray* arrowArray) { + if(arrowArray) { + arrowArray->release = NULL; + } +} diff --git a/swift/Arrow/Sources/ArrowC/include/ArrowCData.h b/swift/Arrow/Sources/ArrowC/include/ArrowCData.h new file mode 100644 index 0000000000000..4b2f35efcb961 --- /dev/null +++ b/swift/Arrow/Sources/ArrowC/include/ArrowCData.h @@ -0,0 +1,78 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#ifndef ARROW_C_DATA_INTERFACE +#define ARROW_C_DATA_INTERFACE + +#define ARROW_FLAG_DICTIONARY_ORDERED 1 +#define ARROW_FLAG_NULLABLE 2 +#define ARROW_FLAG_MAP_KEYS_SORTED 4 + +#include // For int64_t + +#ifdef __cplusplus +extern "C" { +#endif + +struct ArrowSchema { + // Array type description + const char* format; + const char* name; + const char* metadata; + int64_t flags; + int64_t n_children; + struct ArrowSchema** children; + struct ArrowSchema* dictionary; + + // Release callback + void (*release)(struct ArrowSchema*); + // Opaque producer-specific data + void* private_data; +}; + +struct ArrowArray { + // Array data description + int64_t length; + int64_t null_count; + int64_t offset; + int64_t n_buffers; + int64_t n_children; + const void** buffers; + struct ArrowArray** children; + struct ArrowArray* dictionary; + + // Release callback + void (*release)(struct ArrowArray*); + // Opaque producer-specific data + void* private_data; +}; + +// Not able to set the release on the schema +// to NULL in Swift. nil in Swift is not +// equivalent to NULL. +void ArrowSwiftClearReleaseSchema(struct ArrowSchema*); + +// Not able to set the release on the array +// to NULL in Swift. nil in Swift is not +// equivalent to NULL. +void ArrowSwiftClearReleaseArray(struct ArrowArray*); + +#ifdef __cplusplus +} +#endif + +#endif // ARROW_C_DATA_INTERFACE diff --git a/swift/Arrow/Tests/ArrowTests/ArrayTests.swift b/swift/Arrow/Tests/ArrowTests/ArrayTests.swift index f5bfa0506e62f..ed0cb1148e871 100644 --- a/swift/Arrow/Tests/ArrowTests/ArrayTests.swift +++ b/swift/Arrow/Tests/ArrowTests/ArrayTests.swift @@ -18,7 +18,7 @@ import XCTest @testable import Arrow -final class ArrayTests: XCTestCase { +final class ArrayTests: XCTestCase { // swiftlint:disable:this type_body_length func testPrimitiveArray() throws { // This is an example of a functional test case. // Use XCTAssert and related functions to verify your tests produce the correct @@ -245,4 +245,66 @@ final class ArrayTests: XCTestCase { try checkHolderForType(ArrowType(ArrowType.ArrowBool)) try checkHolderForType(ArrowType(ArrowType.ArrowString)) } + + func testArrowArrayHolderBuilder() throws { + let uint8HBuilder: ArrowArrayHolderBuilder = + (try ArrowArrayBuilders.loadNumberArrayBuilder() as NumberArrayBuilder) + for index in 0..<100 { + uint8HBuilder.appendAny(UInt8(index)) + } + + let uint8Holder = try uint8HBuilder.toHolder() + XCTAssertEqual(uint8Holder.nullCount, 0) + XCTAssertEqual(uint8Holder.length, 100) + + let stringHBuilder: ArrowArrayHolderBuilder = + (try ArrowArrayBuilders.loadStringArrayBuilder()) + for index in 0..<100 { + if index % 10 == 9 { + stringHBuilder.appendAny(nil) + } else { + stringHBuilder.appendAny("test" + String(index)) + } + } + + let stringHolder = try stringHBuilder.toHolder() + XCTAssertEqual(stringHolder.nullCount, 10) + XCTAssertEqual(stringHolder.length, 100) + } + + func testAddVArgs() throws { + let arrayBuilder: NumberArrayBuilder = try ArrowArrayBuilders.loadNumberArrayBuilder() + arrayBuilder.append(0, 1, 2, 3, 4, 5, 6, 7, 8, 9) + XCTAssertEqual(arrayBuilder.length, 10) + XCTAssertEqual(try arrayBuilder.finish()[2], 2) + let doubleBuilder: NumberArrayBuilder = try ArrowArrayBuilders.loadNumberArrayBuilder() + doubleBuilder.append(0, 1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8) + XCTAssertEqual(doubleBuilder.length, 9) + XCTAssertEqual(try doubleBuilder.finish()[4], 4.4) + let stringBuilder = try ArrowArrayBuilders.loadStringArrayBuilder() + stringBuilder.append("0", "1", "2", "3", "4", "5", "6") + XCTAssertEqual(stringBuilder.length, 7) + XCTAssertEqual(try stringBuilder.finish()[4], "4") + let boolBuilder = try ArrowArrayBuilders.loadBoolArrayBuilder() + boolBuilder.append(true, false, true, false) + XCTAssertEqual(try boolBuilder.finish()[2], true) + } + + func testAddArray() throws { + let arrayBuilder: NumberArrayBuilder = try ArrowArrayBuilders.loadNumberArrayBuilder() + arrayBuilder.append([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) + XCTAssertEqual(arrayBuilder.length, 10) + XCTAssertEqual(try arrayBuilder.finish()[2], 2) + let doubleBuilder: NumberArrayBuilder = try ArrowArrayBuilders.loadNumberArrayBuilder() + doubleBuilder.append([0, 1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8]) + XCTAssertEqual(doubleBuilder.length, 9) + XCTAssertEqual(try doubleBuilder.finish()[4], 4.4) + let stringBuilder = try ArrowArrayBuilders.loadStringArrayBuilder() + stringBuilder.append(["0", "1", "2", "3", "4", "5", "6"]) + XCTAssertEqual(stringBuilder.length, 7) + XCTAssertEqual(try stringBuilder.finish()[4], "4") + let boolBuilder = try ArrowArrayBuilders.loadBoolArrayBuilder() + boolBuilder.append([true, false, true, false]) + XCTAssertEqual(try boolBuilder.finish()[2], true) + } } diff --git a/swift/Arrow/Tests/ArrowTests/CDataTests.swift b/swift/Arrow/Tests/ArrowTests/CDataTests.swift new file mode 100644 index 0000000000000..2344b234745a2 --- /dev/null +++ b/swift/Arrow/Tests/ArrowTests/CDataTests.swift @@ -0,0 +1,125 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import Foundation +import XCTest +@testable import Arrow +import ArrowC + +final class CDataTests: XCTestCase { + func makeSchema() -> Arrow.ArrowSchema { + let schemaBuilder = ArrowSchema.Builder() + return schemaBuilder + .addField("colBool", type: ArrowType(ArrowType.ArrowBool), isNullable: false) + .addField("colUInt8", type: ArrowType(ArrowType.ArrowUInt8), isNullable: true) + .addField("colUInt16", type: ArrowType(ArrowType.ArrowUInt16), isNullable: true) + .addField("colUInt32", type: ArrowType(ArrowType.ArrowUInt32), isNullable: true) + .addField("colUInt64", type: ArrowType(ArrowType.ArrowUInt64), isNullable: true) + .addField("colInt8", type: ArrowType(ArrowType.ArrowInt8), isNullable: false) + .addField("colInt16", type: ArrowType(ArrowType.ArrowInt16), isNullable: false) + .addField("colInt32", type: ArrowType(ArrowType.ArrowInt32), isNullable: false) + .addField("colInt64", type: ArrowType(ArrowType.ArrowInt64), isNullable: false) + .addField("colString", type: ArrowType(ArrowType.ArrowString), isNullable: false) + .addField("colBinary", type: ArrowType(ArrowType.ArrowBinary), isNullable: false) + .addField("colDate32", type: ArrowType(ArrowType.ArrowDate32), isNullable: false) + .addField("colDate64", type: ArrowType(ArrowType.ArrowDate64), isNullable: false) + .addField("colTime32", type: ArrowType(ArrowType.ArrowTime32), isNullable: false) + .addField("colTime32s", type: ArrowTypeTime32(.seconds), isNullable: false) + .addField("colTime32m", type: ArrowTypeTime32(.milliseconds), isNullable: false) + .addField("colTime64", type: ArrowType(ArrowType.ArrowTime64), isNullable: false) + .addField("colTime64u", type: ArrowTypeTime64(.microseconds), isNullable: false) + .addField("colTime64n", type: ArrowTypeTime64(.nanoseconds), isNullable: false) + .addField("colTime64", type: ArrowType(ArrowType.ArrowTime64), isNullable: false) + .addField("colFloat", type: ArrowType(ArrowType.ArrowFloat), isNullable: false) + .addField("colDouble", type: ArrowType(ArrowType.ArrowDouble), isNullable: false) + .finish() + } + + func checkImportField(_ cSchema: ArrowC.ArrowSchema, name: String, type: ArrowType.Info) throws { + let importer = ArrowCImporter() + switch importer.importField(cSchema) { + case .success(let arrowField): + XCTAssertEqual(arrowField.type.info, type) + XCTAssertEqual(arrowField.name, name) + case .failure(let error): + throw error + } + } + + func testImportExportSchema() throws { + let schema = makeSchema() + let exporter = ArrowCExporter() + for arrowField in schema.fields { + var cSchema = ArrowC.ArrowSchema() + switch exporter.exportField(&cSchema, field: arrowField) { + case .success: + try checkImportField(cSchema, name: arrowField.name, type: arrowField.type.info) + case .failure(let error): + throw error + } + } + } + + func testImportExportArray() throws { + let stringBuilder = try ArrowArrayBuilders.loadStringArrayBuilder() + for index in 0..<100 { + if index % 10 == 9 { + stringBuilder.append(nil) + } else { + stringBuilder.append("test" + String(index)) + } + } + + XCTAssertEqual(stringBuilder.nullCount, 10) + XCTAssertEqual(stringBuilder.length, 100) + XCTAssertEqual(stringBuilder.capacity, 648) + let stringArray = try stringBuilder.finish() + let exporter = ArrowCExporter() + var cArray = ArrowC.ArrowArray() + exporter.exportArray(&cArray, arrowData: stringArray.arrowData) + let cArrayMutPtr = UnsafeMutablePointer.allocate(capacity: 1) + cArrayMutPtr.pointee = cArray + defer { + cArrayMutPtr.deallocate() + } + + let importer = ArrowCImporter() + switch importer.importArray(UnsafePointer(cArrayMutPtr), arrowType: ArrowType(ArrowType.ArrowString)) { + case .success(let holder): + let builder = RecordBatch.Builder() + switch builder + .addColumn("test", arrowArray: holder) + .finish() { + case .success(let rb): + XCTAssertEqual(rb.columnCount, 1) + XCTAssertEqual(rb.length, 100) + let col1: Arrow.ArrowArray = rb.data(for: 0) + for index in 0.. = try ArrowArrayBuilders.loadNumberArrayBuilder() + let int16Builder: NumberArrayBuilder = try ArrowArrayBuilders.loadNumberArrayBuilder() + let int32Builder: NumberArrayBuilder = try ArrowArrayBuilders.loadNumberArrayBuilder() + let int64Builder: NumberArrayBuilder = try ArrowArrayBuilders.loadNumberArrayBuilder() + let uint8Builder: NumberArrayBuilder = try ArrowArrayBuilders.loadNumberArrayBuilder() + let uint16Builder: NumberArrayBuilder = try ArrowArrayBuilders.loadNumberArrayBuilder() + let uint32Builder: NumberArrayBuilder = try ArrowArrayBuilders.loadNumberArrayBuilder() + let uint64Builder: NumberArrayBuilder = try ArrowArrayBuilders.loadNumberArrayBuilder() + let floatBuilder: NumberArrayBuilder = try ArrowArrayBuilders.loadNumberArrayBuilder() + let doubleBuilder: NumberArrayBuilder = try ArrowArrayBuilders.loadNumberArrayBuilder() + let stringBuilder = try ArrowArrayBuilders.loadStringArrayBuilder() + let dateBuilder = try ArrowArrayBuilders.loadDate64ArrayBuilder() + + boolBuilder.append(false, true, false) + int8Builder.append(10, 11, 12) + int16Builder.append(20, 21, 22) + int32Builder.append(30, 31, 32) + int64Builder.append(40, 41, 42) + uint8Builder.append(50, 51, 52) + uint16Builder.append(60, 61, 62) + uint32Builder.append(70, 71, 72) + uint64Builder.append(80, 81, 82) + floatBuilder.append(90.1, 91.1, 92.1) + doubleBuilder.append(101.1, nil, nil) + stringBuilder.append("test0", "test1", "test2") + dateBuilder.append(date1, date1, date1) + let result = RecordBatch.Builder() + .addColumn("propBool", arrowArray: try boolBuilder.toHolder()) + .addColumn("propInt8", arrowArray: try int8Builder.toHolder()) + .addColumn("propInt16", arrowArray: try int16Builder.toHolder()) + .addColumn("propInt32", arrowArray: try int32Builder.toHolder()) + .addColumn("propInt64", arrowArray: try int64Builder.toHolder()) + .addColumn("propUInt8", arrowArray: try uint8Builder.toHolder()) + .addColumn("propUInt16", arrowArray: try uint16Builder.toHolder()) + .addColumn("propUInt32", arrowArray: try uint32Builder.toHolder()) + .addColumn("propUInt64", arrowArray: try uint64Builder.toHolder()) + .addColumn("propFloat", arrowArray: try floatBuilder.toHolder()) + .addColumn("propDouble", arrowArray: try doubleBuilder.toHolder()) + .addColumn("propString", arrowArray: try stringBuilder.toHolder()) + .addColumn("propDate", arrowArray: try dateBuilder.toHolder()) + .finish() + switch result { + case .success(let rb): + let decoder = ArrowDecoder(rb) + var testClasses = try decoder.decode(TestClass.self) + for index in 0.. = try ArrowArrayBuilders.loadNumberArrayBuilder() + int8Builder.append(10, 11, 12) + let result = RecordBatch.Builder() + .addColumn("propInt8", arrowArray: try int8Builder.toHolder()) + .finish() + switch result { + case .success(let rb): + let decoder = ArrowDecoder(rb) + let testData = try decoder.decode(Int8?.self) + for index in 0.. = try ArrowArrayBuilders.loadNumberArrayBuilder() + int8WNilBuilder.append(10, nil, 12, nil) + let resultWNil = RecordBatch.Builder() + .addColumn("propInt8", arrowArray: try int8WNilBuilder.toHolder()) + .finish() + switch resultWNil { + case .success(let rb): + let decoder = ArrowDecoder(rb) + let testData = try decoder.decode(Int8?.self) + for index in 0.. = try ArrowArrayBuilders.loadNumberArrayBuilder() + let stringBuilder = try ArrowArrayBuilders.loadStringArrayBuilder() + int8Builder.append(10, 11, 12, 13) + stringBuilder.append("test0", "test1", "test2", "test3") + let result = RecordBatch.Builder() + .addColumn("propInt8", arrowArray: try int8Builder.toHolder()) + .addColumn("propString", arrowArray: try stringBuilder.toHolder()) + .finish() + switch result { + case .success(let rb): + let decoder = ArrowDecoder(rb) + let testData = try decoder.decode([Int8: String].self) + var index: Int8 = 0 + for data in testData { + let str = data[10 + index] + XCTAssertEqual(str, "test\(index)") + index += 1 + } + case .failure(let err): + throw err + } + } + + func testArrowUnkeyedDecoderWithNull() throws { + let int8Builder: NumberArrayBuilder = try ArrowArrayBuilders.loadNumberArrayBuilder() + let stringWNilBuilder = try ArrowArrayBuilders.loadStringArrayBuilder() + int8Builder.append(10, 11, 12, 13) + stringWNilBuilder.append(nil, "test1", nil, "test3") + let resultWNil = RecordBatch.Builder() + .addColumn("propInt8", arrowArray: try int8Builder.toHolder()) + .addColumn("propString", arrowArray: try stringWNilBuilder.toHolder()) + .finish() + switch resultWNil { + case .success(let rb): + let decoder = ArrowDecoder(rb) + let testData = try decoder.decode([Int8: String?].self) + var index: Int8 = 0 + for data in testData { + let str = data[10 + index] + if index % 2 == 0 { + XCTAssertNil(str!) + } else { + XCTAssertEqual(str, "test\(index)") + } + index += 1 + } + case .failure(let err): + throw err + } + + } +} diff --git a/swift/Arrow/Tests/ArrowTests/IPCTests.swift b/swift/Arrow/Tests/ArrowTests/IPCTests.swift index 103c3b24c7b93..311a3bb235b78 100644 --- a/swift/Arrow/Tests/ArrowTests/IPCTests.swift +++ b/swift/Arrow/Tests/ArrowTests/IPCTests.swift @@ -98,11 +98,11 @@ func makeRecordBatch() throws -> RecordBatch { floatBuilder.append(433.334) floatBuilder.append(544.445) - let uint8Holder = ArrowArrayHolder(try uint8Builder.finish()) - let stringHolder = ArrowArrayHolder(try stringBuilder.finish()) - let date32Holder = ArrowArrayHolder(try date32Builder.finish()) - let int32Holder = ArrowArrayHolder(try int32Builder.finish()) - let floatHolder = ArrowArrayHolder(try floatBuilder.finish()) + let uint8Holder = ArrowArrayHolderImpl(try uint8Builder.finish()) + let stringHolder = ArrowArrayHolderImpl(try stringBuilder.finish()) + let date32Holder = ArrowArrayHolderImpl(try date32Builder.finish()) + let int32Holder = ArrowArrayHolderImpl(try int32Builder.finish()) + let floatHolder = ArrowArrayHolderImpl(try floatBuilder.finish()) let result = RecordBatch.Builder() .addColumn("col1", arrowArray: uint8Holder) .addColumn("col2", arrowArray: stringHolder) @@ -279,7 +279,7 @@ final class IPCFileReaderTests: XCTestCase { binaryBuilder.append("test33".data(using: .utf8)) binaryBuilder.append("test44".data(using: .utf8)) - let binaryHolder = ArrowArrayHolder(try binaryBuilder.finish()) + let binaryHolder = ArrowArrayHolderImpl(try binaryBuilder.finish()) let result = RecordBatch.Builder() .addColumn("binary", arrowArray: binaryHolder) .finish() @@ -307,8 +307,8 @@ final class IPCFileReaderTests: XCTestCase { time32Builder.append(2) time32Builder.append(nil) time32Builder.append(3) - let time64Holder = ArrowArrayHolder(try time64Builder.finish()) - let time32Holder = ArrowArrayHolder(try time32Builder.finish()) + let time64Holder = ArrowArrayHolderImpl(try time64Builder.finish()) + let time32Holder = ArrowArrayHolderImpl(try time32Builder.finish()) let result = RecordBatch.Builder() .addColumn("time64", arrowArray: time64Holder) .addColumn("time32", arrowArray: time32Holder) diff --git a/swift/Arrow/Tests/ArrowTests/RecordBatchTests.swift b/swift/Arrow/Tests/ArrowTests/RecordBatchTests.swift index 8820f1cdb1a91..9961781f30833 100644 --- a/swift/Arrow/Tests/ArrowTests/RecordBatchTests.swift +++ b/swift/Arrow/Tests/ArrowTests/RecordBatchTests.swift @@ -29,8 +29,8 @@ final class RecordBatchTests: XCTestCase { stringBuilder.append("test22") stringBuilder.append("test33") - let intHolder = ArrowArrayHolder(try uint8Builder.finish()) - let stringHolder = ArrowArrayHolder(try stringBuilder.finish()) + let intHolder = ArrowArrayHolderImpl(try uint8Builder.finish()) + let stringHolder = ArrowArrayHolderImpl(try stringBuilder.finish()) let result = RecordBatch.Builder() .addColumn("col1", arrowArray: intHolder) .addColumn("col2", arrowArray: stringHolder) diff --git a/swift/Arrow/Tests/ArrowTests/TableTests.swift b/swift/Arrow/Tests/ArrowTests/TableTests.swift index a82a07979345c..8e958ccbf9f9f 100644 --- a/swift/Arrow/Tests/ArrowTests/TableTests.swift +++ b/swift/Arrow/Tests/ArrowTests/TableTests.swift @@ -132,8 +132,8 @@ final class TableTests: XCTestCase { let stringBuilder = try ArrowArrayBuilders.loadStringArrayBuilder() stringBuilder.append("test10") stringBuilder.append("test22") - let intHolder = ArrowArrayHolder(try uint8Builder.finish()) - let stringHolder = ArrowArrayHolder(try stringBuilder.finish()) + let intHolder = ArrowArrayHolderImpl(try uint8Builder.finish()) + let stringHolder = ArrowArrayHolderImpl(try stringBuilder.finish()) let result = RecordBatch.Builder() .addColumn("col1", arrowArray: intHolder) .addColumn("col2", arrowArray: stringHolder) diff --git a/swift/ArrowFlight/Package.swift b/swift/ArrowFlight/Package.swift index f3caa83486764..629b830a6e0da 100644 --- a/swift/ArrowFlight/Package.swift +++ b/swift/ArrowFlight/Package.swift @@ -29,7 +29,7 @@ let package = Package( // Products define the executables and libraries a package produces, making them visible to other packages. .library( name: "ArrowFlight", - targets: ["ArrowFlight"]), + targets: ["ArrowFlight"]) ], dependencies: [ .package(url: "https://github.com/grpc/grpc-swift.git", from: "1.15.0"), @@ -48,6 +48,6 @@ let package = Package( ]), .testTarget( name: "ArrowFlightTests", - dependencies: ["ArrowFlight"]), + dependencies: ["ArrowFlight"]) ] ) diff --git a/swift/ArrowFlight/Tests/ArrowFlightTests/FlightTest.swift b/swift/ArrowFlight/Tests/ArrowFlightTests/FlightTest.swift index 8097388c7fde1..f7bc3c1ccb0c3 100644 --- a/swift/ArrowFlight/Tests/ArrowFlightTests/FlightTest.swift +++ b/swift/ArrowFlight/Tests/ArrowFlightTests/FlightTest.swift @@ -51,9 +51,9 @@ func makeRecordBatch() throws -> RecordBatch { date32Builder.append(date2) date32Builder.append(date1) date32Builder.append(date2) - let doubleHolder = ArrowArrayHolder(try doubleBuilder.finish()) - let stringHolder = ArrowArrayHolder(try stringBuilder.finish()) - let date32Holder = ArrowArrayHolder(try date32Builder.finish()) + let doubleHolder = ArrowArrayHolderImpl(try doubleBuilder.finish()) + let stringHolder = ArrowArrayHolderImpl(try stringBuilder.finish()) + let date32Holder = ArrowArrayHolderImpl(try date32Builder.finish()) let result = RecordBatch.Builder() .addColumn("col1", arrowArray: doubleHolder) .addColumn("col2", arrowArray: stringHolder) diff --git a/swift/CDataWGo/.gitignore b/swift/CDataWGo/.gitignore new file mode 100644 index 0000000000000..0023a53406379 --- /dev/null +++ b/swift/CDataWGo/.gitignore @@ -0,0 +1,8 @@ +.DS_Store +/.build +/Packages +xcuserdata/ +DerivedData/ +.swiftpm/configuration/registries.json +.swiftpm/xcode/package.xcworkspace/contents.xcworkspacedata +.netrc diff --git a/swift/CDataWGo/Package.swift b/swift/CDataWGo/Package.swift new file mode 100644 index 0000000000000..64d29aec6b845 --- /dev/null +++ b/swift/CDataWGo/Package.swift @@ -0,0 +1,43 @@ +// swift-tools-version: 5.9 +// The swift-tools-version declares the minimum version of Swift required to build this package. + +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import PackageDescription + +let package = Package( + name: "go-swift", + platforms: [ + .macOS(.v10_14) + ], + products: [ + .library( + name: "go-swift", + type: .static, + targets: ["go-swift"]) + ], + dependencies: [ + .package(path: "../Arrow") // 👈 Reference to a Local Package + ], + targets: [ + .target( + name: "go-swift", + dependencies: [ + .product(name: "Arrow", package: "Arrow") + ]) + ] +) diff --git a/swift/CDataWGo/Sources/go-swift/CDataTest.swift b/swift/CDataWGo/Sources/go-swift/CDataTest.swift new file mode 100644 index 0000000000000..b38ca7240ab60 --- /dev/null +++ b/swift/CDataWGo/Sources/go-swift/CDataTest.swift @@ -0,0 +1,132 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import Arrow +import ArrowC + +@_cdecl("stringTypeFromSwift") +func stringTypeFromSwift(cSchema: UnsafePointer) { + let unsafePointer = UnsafeMutablePointer(mutating: cSchema) + let exporter = ArrowCExporter() + switch exporter.exportType(&unsafePointer.pointee, arrowType: ArrowType(ArrowType.ArrowString), name: "col1") { + case .success: + return + case .failure(let err): + fatalError("Error exporting string type from swift: \(err)") + } +} + +@_cdecl("stringTypeToSwift") +func stringTypeToSwift(cSchema: UnsafePointer) { + let importer = ArrowCImporter() + switch importer.importField(cSchema.pointee) { + case .success(let field): + if field.name != "col1" { + fatalError("Field name was incorrect expected: col1 but found: \(field.name)") + } + + if field.type.id != ArrowTypeId.string { + fatalError("Field type was incorrect expected: string but found: \(field.type.id)") + } + case .failure(let err): + fatalError("Error importing string type to swift: \(err)") + } +} + +@_cdecl("arrayIntFromSwift") +func arrayIntFromSwift(cArray: UnsafePointer) { + do { + let unsafePointer = UnsafeMutablePointer(mutating: cArray) + let arrayBuilder: NumberArrayBuilder = try ArrowArrayBuilders.loadNumberArrayBuilder() + for index in 0..<100 { + arrayBuilder.append(Int32(index)) + } + + let array = try arrayBuilder.finish() + let exporter = ArrowCExporter() + exporter.exportArray(&unsafePointer.pointee, arrowData: array.arrowData) + } catch let err { + fatalError("Error exporting array from swift \(err)") + } +} + +@_cdecl("arrayStringFromSwift") +func arrayStringFromSwift(cArray: UnsafePointer) { + do { + let unsafePointer = UnsafeMutablePointer(mutating: cArray) + let arrayBuilder = try ArrowArrayBuilders.loadStringArrayBuilder() + for index in 0..<100 { + arrayBuilder.append("test" + String(index)) + } + + let array = try arrayBuilder.finish() + let exporter = ArrowCExporter() + exporter.exportArray(&unsafePointer.pointee, arrowData: array.arrowData) + } catch let err { + fatalError("Error exporting array from swift \(err)") + } +} + +@_cdecl("arrayIntToSwift") +func arrayIntToSwift(cArray: UnsafePointer) { + let importer = ArrowCImporter() + switch importer.importArray(cArray, arrowType: ArrowType(ArrowType.ArrowInt32)) { + case .success(let int32Holder): + let result = RecordBatch.Builder() + .addColumn("col1", arrowArray: int32Holder) + .finish() + switch result { + case .success(let recordBatch): + let col1: Arrow.ArrowArray = recordBatch.data(for: 0) + for index in 0..) { + let importer = ArrowCImporter() + switch importer.importArray(cArray, arrowType: ArrowType(ArrowType.ArrowString)) { + case .success(let dataHolder): + let result = RecordBatch.Builder() + .addColumn("col1", arrowArray: dataHolder) + .finish() + switch result { + case .success(let recordBatch): + let col1: Arrow.ArrowArray = recordBatch.data(for: 0) + for index in 0.. +#include "go_swift.h" +*/ +import "C" +import ( + "strconv" + "unsafe" + + "github.com/apache/arrow/go/v16/arrow" + "github.com/apache/arrow/go/v16/arrow/array" + "github.com/apache/arrow/go/v16/arrow/cdata" + "github.com/apache/arrow/go/v16/arrow/memory" +) + +func stringTypeFromSwift() { + arrowSchema := &cdata.CArrowSchema{} + swSchema := (*C.struct_ArrowSchema)(unsafe.Pointer(arrowSchema)) + C.stringTypeFromSwift(swSchema) + gofield, _ := cdata.ImportCArrowField(arrowSchema) + if gofield.Name != "col1" { + panic("Imported type has incorrect name") + } +} + +func stringTypeToSwift() { + arrowSchema := &cdata.CArrowSchema{} + swSchema := (*C.struct_ArrowSchema)(unsafe.Pointer(arrowSchema)) + C.stringTypeFromSwift(swSchema) + gofield, _ := cdata.ImportCArrowField(arrowSchema) + if gofield.Name != "col1" { + panic("Imported type has incorrect name") + } +} + +func arrayStringFromSwift() { + arrowArray := &cdata.CArrowArray{} + swarray := (*C.struct_ArrowArray)(unsafe.Pointer(arrowArray)) + C.arrayStringFromSwift(swarray) + arr, _ := cdata.ImportCArrayWithType(arrowArray, arrow.BinaryTypes.String) + if arr.Len() != 100 { + panic("Array length is incorrect") + } + + for i := 0; i < 100; i++ { + if arr.ValueStr(i) != ("test" + strconv.Itoa(i)) { + panic("Array value is incorrect") + } + } +} + +func arrayIntFromSwift() { + arrowArray := &cdata.CArrowArray{} + swarray := (*C.struct_ArrowArray)(unsafe.Pointer(arrowArray)) + C.arrayIntFromSwift(swarray) + arr, _ := cdata.ImportCArrayWithType(arrowArray, arrow.PrimitiveTypes.Int32) + if arr.Len() != 100 { + panic("Array length is incorrect") + } + + vals := arr.(*array.Int32).Int32Values() + // and that the values are correct + for i, v := range vals { + if v != int32(i) { + panic("Array value is incorrect") + } + } +} + +func arrayIntToSwift() { + bld := array.NewUint32Builder(memory.DefaultAllocator) + defer bld.Release() + bld.AppendValues([]uint32{1, 2, 3, 4}, []bool{true, true, true, true}) + goarray := bld.NewUint32Array() + var carray cdata.CArrowArray + cdata.ExportArrowArray(goarray, &carray, nil) + swarray := (*C.struct_ArrowArray)(unsafe.Pointer(&carray)) + C.arrayIntToSwift(swarray) + + if swarray.release != nil { + panic("Release was not called by swift to deallocate C array") + } +} + +func arrayStringToSwift() { + bld := array.NewStringBuilder(memory.DefaultAllocator) + defer bld.Release() + bld.AppendValues([]string{"test0", "test1", "test2", "test3"}, []bool{true, true, true, true}) + goarray := bld.NewStringArray() + var carray cdata.CArrowArray + cdata.ExportArrowArray(goarray, &carray, nil) + swarray := (*C.struct_ArrowArray)(unsafe.Pointer(&carray)) + C.arrayStringToSwift(swarray) + + if swarray.release != nil { + panic("Release was not called by swift to deallocate C array") + } +} + +func main() { + stringTypeFromSwift() + stringTypeToSwift() + arrayStringFromSwift() + arrayIntFromSwift() + arrayIntToSwift() + arrayStringToSwift() +} diff --git a/swift/data-generator/swift-datagen/main.go b/swift/data-generator/swift-datagen/main.go index a60fb562932fe..2f2e244ab5891 100644 --- a/swift/data-generator/swift-datagen/main.go +++ b/swift/data-generator/swift-datagen/main.go @@ -22,8 +22,8 @@ import ( "github.com/apache/arrow/go/v12/arrow" "github.com/apache/arrow/go/v12/arrow/array" - "github.com/apache/arrow/go/v12/arrow/memory" "github.com/apache/arrow/go/v12/arrow/ipc" + "github.com/apache/arrow/go/v12/arrow/memory" ) func writeBytes(rec arrow.Record, file_name string) { @@ -42,7 +42,6 @@ func writeBytes(rec arrow.Record, file_name string) { rr.Close() } - func writeBoolData() { alloc := memory.NewGoAllocator() schema := arrow.NewSchema([]arrow.Field{ @@ -53,14 +52,13 @@ func writeBoolData() { b := array.NewRecordBuilder(alloc, schema) defer b.Release() - b.Field(0).(*array.BooleanBuilder).AppendValues([]bool{true, false,}, nil) + b.Field(0).(*array.BooleanBuilder).AppendValues([]bool{true, false}, nil) b.Field(0).(*array.BooleanBuilder).AppendNull() - b.Field(0).(*array.BooleanBuilder).AppendValues([]bool{false, true,}, nil) + b.Field(0).(*array.BooleanBuilder).AppendValues([]bool{false, true}, nil) b.Field(1).(*array.StringBuilder).AppendValues([]string{"zero", "one", "two", "three", "four"}, nil) rec := b.NewRecord() defer rec.Release() - writeBytes(rec, "testdata_bool.arrow") } @@ -81,11 +79,10 @@ func writeDoubleData() { rec := b.NewRecord() defer rec.Release() - writeBytes(rec, "testdata_double.arrow") } func main() { - writeBoolData(); - writeDoubleData(); + writeBoolData() + writeDoubleData() }